diff --git "a/profile_trace/iteration_22528/rank0_trace.json" "b/profile_trace/iteration_22528/rank0_trace.json" new file mode 100644--- /dev/null +++ "b/profile_trace/iteration_22528/rank0_trace.json" @@ -0,0 +1,157189 @@ + +{ + "schemaVersion": 1, + "deviceProperties": [ + { + "id": 0, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 1, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 2, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 3, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 4, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 5, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 6, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 7, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + } + ], + "cupti_version": 22, + "cuda_runtime_version": 12040, + "cuda_driver_version": 12080, + "distributedInfo": {"backend": "nccl", "rank": 0, "world_size": 8, "pg_count": 1, "pg_config": [{"pg_name": "0", "pg_desc": "default_pg", "backend_config": "cuda:nccl", "pg_size": 8, "ranks": [0, 1, 2, 3, 4, 5, 6, 7]}], "nccl_version": "2.21.5"}, + "record_shapes": 1, + "trace_id": "DE0ED2D03D78453BBD518D614D24661B", + "traceEvents": [ + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: DivBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937202805.192, "dur": 157.492, + "args": { + "External id": 977409,"Record function id": 0, "Sequence number": 10552468, "Fwd thread id": 1, "Ev Idx": 0 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "DivBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937202850.245, "dur": 100.959, + "args": { + "External id": 977410,"Sequence number": 10552468, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 1 + } + }, + { + "ph": "f", "id": 1, "pid": 2338706, "tid": 2379422, "ts": 6345937202850.245, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338706, "tid": 2379422, + "ts": 6345937202860.106, "dur": 88.277, + "args": { + "External id": 977411,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 2 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937202975.760, "dur": 326.333, + "args": { + "External id": 977412,"Record function id": 0, "Ev Idx": 3 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937203099.420, "dur": 106.980, + "args": { + "External id": 977413,"Record function id": 0, "Ev Idx": 4 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.25", "pid": 2338706, "tid": 2379422, + "ts": 6345937203136.355, "dur": 58.243, + "args": { + "External id": 977414,"Record function id": 0, "Ev Idx": 5 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937203212.115, "dur": 2.184, + "args": { + "External id": 977415,"Sequence number": 10552467, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 6 + } + }, + { + "ph": "f", "id": 2, "pid": 2338706, "tid": 2379422, "ts": 6345937203212.115, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937203219.730, "dur": 75.661, + "args": { + "External id": 977416,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 7 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937203242.118, "dur": 52.598, + "args": { + "External id": 977417,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 8 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937203254.684, "dur": 4.104, + "args": { + "External id": 977418,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937203314.125, "dur": 36509.397, + "args": { + "External id": 977419,"Record function id": 0, "Sequence number": 10552465, "Fwd thread id": 1, "Ev Idx": 10 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937203316.278, "dur": 36490.485, + "args": { + "External id": 977420,"Sequence number": 10552465, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11 + } + }, + { + "ph": "f", "id": 3, "pid": 2338706, "tid": 2379422, "ts": 6345937203316.278, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937203366.963, "dur": 4.156, + "args": { + "External id": 977421,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937203374.906, "dur": 36120.130, + "args": { + "External id": 977422,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937203381.239, "dur": 36113.470, + "args": { + "External id": 977423,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 14 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937203388.166, "dur": 7.612, + "args": { + "External id": 977424,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937203398.052, "dur": 36095.255, + "args": { + "External id": 977425,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 16 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338706, "tid": 2379422, + "ts": 6345937239500.422, "dur": 0.572, + "args": { + "External id": 977426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 17 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338706, "tid": 2379422, + "ts": 6345937239504.041, "dur": 3.375, + "args": { + "External id": 977427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 18 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338706, "tid": 2379422, + "ts": 6345937239506.084, "dur": 1.189, + "args": { + "External id": 977428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338706, "tid": 2379422, + "ts": 6345937239514.835, "dur": 30.833, + "args": { + "External id": 977429,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 20 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338706, "tid": 2379422, + "ts": 6345937239555.410, "dur": 49.082, + "args": { + "External id": 977430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338706, "tid": 2379422, + "ts": 6345937239557.939, "dur": 46.351, + "args": { + "External id": 977431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 22 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338706, "tid": 2379422, + "ts": 6345937239560.025, "dur": 43.940, + "args": { + "External id": 977432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 23 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937239838.098, "dur": 23.317, + "args": { + "External id": 977433,"Record function id": 0, "Sequence number": 10552464, "Fwd thread id": 1, "Ev Idx": 24 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937239840.712, "dur": 16.830, + "args": { + "External id": 977434,"Sequence number": 10552464, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 25 + } + }, + { + "ph": "f", "id": 4, "pid": 2338706, "tid": 2379422, "ts": 6345937239840.712, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937239846.308, "dur": 10.968, + "args": { + "External id": 977435,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 26 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937239851.372, "dur": 5.656, + "args": { + "External id": 977436,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 27 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937239868.580, "dur": 118.859, + "args": { + "External id": 977437,"Record function id": 0, "Sequence number": 10552463, "Fwd thread id": 1, "Ev Idx": 28 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937239869.856, "dur": 109.428, + "args": { + "External id": 977438,"Sequence number": 10552463, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 29 + } + }, + { + "ph": "f", "id": 5, "pid": 2338706, "tid": 2379422, "ts": 6345937239869.856, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937239874.701, "dur": 103.751, + "args": { + "External id": 977439,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 30 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345937239880.468, "dur": 40.867, + "args": { + "External id": 977440,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 31 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937239885.768, "dur": 7.322, + "args": { + "External id": 977441,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 32 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345937239895.244, "dur": 25.780, + "args": { + "External id": 977442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 33 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345937239901.419, "dur": 19.120, + "args": { + "External id": 977443,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 34 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937239924.628, "dur": 7.844, + "args": { + "External id": 977444,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 35 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937239927.585, "dur": 4.539, + "args": { + "External id": 977445,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 36 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937239934.031, "dur": 43.479, + "args": { + "External id": 977446,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 37 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937239992.459, "dur": 137.561, + "args": { + "External id": 977447,"Record function id": 0, "Sequence number": 10552462, "Fwd thread id": 1, "Ev Idx": 38 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937239993.457, "dur": 130.658, + "args": { + "External id": 977448,"Sequence number": 10552462, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 39 + } + }, + { + "ph": "f", "id": 6, "pid": 2338706, "tid": 2379422, "ts": 6345937239993.457, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937239997.624, "dur": 126.006, + "args": { + "External id": 977449,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "3"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 40 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345937240003.735, "dur": 39.449, + "args": { + "External id": 977450,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 41 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937240005.160, "dur": 17.426, + "args": { + "External id": 977451,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 42 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345937240024.295, "dur": 18.613, + "args": { + "External id": 977452,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 43 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345937240028.539, "dur": 13.985, + "args": { + "External id": 977453,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 44 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2379422, + "ts": 6345937240045.129, "dur": 51.095, + "args": { + "External id": 977454,"Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 45 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937240051.948, "dur": 42.562, + "args": { + "External id": 977455,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 46 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937240097.965, "dur": 24.776, + "args": { + "External id": 977456,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 47 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937240136.553, "dur": 236.391, + "args": { + "External id": 977457,"Record function id": 0, "Sequence number": 10552461, "Fwd thread id": 1, "Ev Idx": 48 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937240137.804, "dur": 230.455, + "args": { + "External id": 977458,"Sequence number": 10552461, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 49 + } + }, + { + "ph": "f", "id": 7, "pid": 2338706, "tid": 2379422, "ts": 6345937240137.804, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937240143.092, "dur": 224.765, + "args": { + "External id": 977459,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 50 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345937240147.488, "dur": 26.799, + "args": { + "External id": 977460,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 51 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937240148.486, "dur": 4.415, + "args": { + "External id": 977461,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 52 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345937240154.436, "dur": 19.542, + "args": { + "External id": 977462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 53 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345937240155.853, "dur": 17.731, + "args": { + "External id": 977463,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 54 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937240175.696, "dur": 2.632, + "args": { + "External id": 977464,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 55 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937240177.125, "dur": 0.908, + "args": { + "External id": 977465,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 56 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937240181.689, "dur": 185.151, + "args": { + "External id": 977466,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 57 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937240377.886, "dur": 86.223, + "args": { + "External id": 977467,"Record function id": 0, "Sequence number": 10552460, "Fwd thread id": 1, "Ev Idx": 58 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937240379.539, "dur": 80.309, + "args": { + "External id": 977468,"Sequence number": 10552460, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 59 + } + }, + { + "ph": "f", "id": 8, "pid": 2338706, "tid": 2379422, "ts": 6345937240379.539, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937240381.325, "dur": 78.163, + "args": { + "External id": 977469,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 60 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345937240385.041, "dur": 16.334, + "args": { + "External id": 977470,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 61 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937240385.741, "dur": 2.431, + "args": { + "External id": 977471,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 62 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345937240388.712, "dur": 12.418, + "args": { + "External id": 977472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 63 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345937240389.805, "dur": 10.947, + "args": { + "External id": 977473,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 64 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937240402.456, "dur": 4.632, + "args": { + "External id": 977474,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 65 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937240406.431, "dur": 0.430, + "args": { + "External id": 977475,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 66 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937240408.144, "dur": 50.445, + "args": { + "External id": 977476,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 67 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937240471.899, "dur": 46.660, + "args": { + "External id": 977477,"Record function id": 0, "Sequence number": 10552459, "Fwd thread id": 1, "Ev Idx": 68 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937240474.040, "dur": 1.517, + "args": { + "External id": 977478,"Sequence number": 10552459, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 69 + } + }, + { + "ph": "f", "id": 9, "pid": 2338706, "tid": 2379422, "ts": 6345937240474.040, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937240478.715, "dur": 34.600, + "args": { + "External id": 977479,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 70 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937240481.153, "dur": 31.588, + "args": { + "External id": 977480,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 71 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937240492.699, "dur": 2.552, + "args": { + "External id": 977481,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 72 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937240524.128, "dur": 2357.812, + "args": { + "External id": 977482,"Record function id": 0, "Sequence number": 10552457, "Fwd thread id": 1, "Ev Idx": 73 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937240525.787, "dur": 2314.934, + "args": { + "External id": 977483,"Sequence number": 10552457, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 74 + } + }, + { + "ph": "f", "id": 10, "pid": 2338706, "tid": 2379422, "ts": 6345937240525.787, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937240568.019, "dur": 2.674, + "args": { + "External id": 977484,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 75 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937240573.379, "dur": 2038.050, + "args": { + "External id": 977485,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 76 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937240575.358, "dur": 2035.711, + "args": { + "External id": 977486,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 77 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937240578.348, "dur": 6.017, + "args": { + "External id": 977487,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 78 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937240585.499, "dur": 2024.497, + "args": { + "External id": 977488,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 79 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338706, "tid": 2379422, + "ts": 6345937242615.292, "dur": 0.412, + "args": { + "External id": 977489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 80 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338706, "tid": 2379422, + "ts": 6345937242617.346, "dur": 2.804, + "args": { + "External id": 977490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 81 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338706, "tid": 2379422, + "ts": 6345937242619.069, "dur": 0.860, + "args": { + "External id": 977491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 82 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338706, "tid": 2379422, + "ts": 6345937242625.006, "dur": 22.056, + "args": { + "External id": 977492,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 83 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338706, "tid": 2379422, + "ts": 6345937242652.897, "dur": 44.255, + "args": { + "External id": 977493,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 84 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338706, "tid": 2379422, + "ts": 6345937242656.972, "dur": 39.921, + "args": { + "External id": 977494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 85 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338706, "tid": 2379422, + "ts": 6345937242658.380, "dur": 38.298, + "args": { + "External id": 977495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 86 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345937242856.339, "dur": 20.494, + "args": { + "External id": 977496,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 87 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937242893.260, "dur": 16.939, + "args": { + "External id": 977497,"Record function id": 0, "Sequence number": 10552456, "Fwd thread id": 1, "Ev Idx": 88 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937242894.978, "dur": 11.738, + "args": { + "External id": 977498,"Sequence number": 10552456, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 89 + } + }, + { + "ph": "f", "id": 11, "pid": 2338706, "tid": 2379422, "ts": 6345937242894.978, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937242900.626, "dur": 5.860, + "args": { + "External id": 977499,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 90 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937242902.160, "dur": 4.134, + "args": { + "External id": 977500,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 91 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937242914.126, "dur": 87.816, + "args": { + "External id": 977501,"Record function id": 0, "Sequence number": 10552455, "Fwd thread id": 1, "Ev Idx": 92 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937242915.736, "dur": 79.777, + "args": { + "External id": 977502,"Sequence number": 10552455, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 93 + } + }, + { + "ph": "f", "id": 12, "pid": 2338706, "tid": 2379422, "ts": 6345937242915.736, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937242918.201, "dur": 76.959, + "args": { + "External id": 977503,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 94 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345937242923.573, "dur": 24.428, + "args": { + "External id": 977504,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 95 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937242925.779, "dur": 3.453, + "args": { + "External id": 977505,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 96 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345937242930.359, "dur": 17.357, + "args": { + "External id": 977506,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 97 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345937242932.148, "dur": 15.139, + "args": { + "External id": 977507,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 98 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937242949.680, "dur": 4.358, + "args": { + "External id": 977508,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 99 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937242952.153, "dur": 1.510, + "args": { + "External id": 977509,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937242958.198, "dur": 36.122, + "args": { + "External id": 977510,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937243006.357, "dur": 112.950, + "args": { + "External id": 977511,"Record function id": 0, "Sequence number": 10552454, "Fwd thread id": 1, "Ev Idx": 102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937243021.782, "dur": 92.765, + "args": { + "External id": 977512,"Sequence number": 10552454, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 103 + } + }, + { + "ph": "f", "id": 13, "pid": 2338706, "tid": 2379422, "ts": 6345937243021.782, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937243028.225, "dur": 85.996, + "args": { + "External id": 977513,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "2"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345937243030.153, "dur": 50.658, + "args": { + "External id": 977514,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937243031.535, "dur": 3.694, + "args": { + "External id": 977515,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345937243036.239, "dur": 44.255, + "args": { + "External id": 977516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345937243037.869, "dur": 41.335, + "args": { + "External id": 977517,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2379422, + "ts": 6345937243085.327, "dur": 9.528, + "args": { + "External id": 977518,"Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937243091.153, "dur": 3.205, + "args": { + "External id": 977519,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937243095.873, "dur": 17.621, + "args": { + "External id": 977520,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937243127.165, "dur": 139.303, + "args": { + "External id": 977521,"Record function id": 0, "Sequence number": 10552453, "Fwd thread id": 1, "Ev Idx": 112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937243128.349, "dur": 134.046, + "args": { + "External id": 977522,"Sequence number": 10552453, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 113 + } + }, + { + "ph": "f", "id": 14, "pid": 2338706, "tid": 2379422, "ts": 6345937243128.349, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937243130.332, "dur": 131.667, + "args": { + "External id": 977523,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345937243131.640, "dur": 19.005, + "args": { + "External id": 977524,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937243132.911, "dur": 2.442, + "args": { + "External id": 977525,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345937243136.344, "dur": 14.036, + "args": { + "External id": 977526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345937243137.196, "dur": 12.882, + "args": { + "External id": 977527,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937243154.239, "dur": 5.375, + "args": { + "External id": 977528,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937243158.426, "dur": 0.865, + "args": { + "External id": 977529,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937243160.705, "dur": 100.410, + "args": { + "External id": 977530,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937243271.153, "dur": 105.526, + "args": { + "External id": 977531,"Record function id": 0, "Sequence number": 10552452, "Fwd thread id": 1, "Ev Idx": 122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937243272.383, "dur": 83.078, + "args": { + "External id": 977532,"Sequence number": 10552452, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 123 + } + }, + { + "ph": "f", "id": 15, "pid": 2338706, "tid": 2379422, "ts": 6345937243272.383, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937243276.681, "dur": 78.475, + "args": { + "External id": 977533,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345937243277.750, "dur": 18.166, + "args": { + "External id": 977534,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937243278.921, "dur": 2.123, + "args": { + "External id": 977535,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345937243281.714, "dur": 13.945, + "args": { + "External id": 977536,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345937243282.444, "dur": 12.861, + "args": { + "External id": 977537,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937243296.879, "dur": 2.207, + "args": { + "External id": 977538,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937243298.288, "dur": 0.561, + "args": { + "External id": 977539,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937243302.581, "dur": 51.863, + "args": { + "External id": 977540,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345937243360.270, "dur": 15.019, + "args": { + "External id": 977541,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937243381.824, "dur": 38.399, + "args": { + "External id": 977542,"Record function id": 0, "Sequence number": 10552451, "Fwd thread id": 1, "Ev Idx": 133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937243383.203, "dur": 1.367, + "args": { + "External id": 977543,"Sequence number": 10552451, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 134 + } + }, + { + "ph": "f", "id": 16, "pid": 2338706, "tid": 2379422, "ts": 6345937243383.203, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937243387.947, "dur": 29.874, + "args": { + "External id": 977544,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937243390.321, "dur": 26.899, + "args": { + "External id": 977545,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937243397.705, "dur": 0.647, + "args": { + "External id": 977546,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937243425.118, "dur": 3428.382, + "args": { + "External id": 977547,"Record function id": 0, "Sequence number": 10552449, "Fwd thread id": 1, "Ev Idx": 138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937243428.691, "dur": 3388.535, + "args": { + "External id": 977548,"Sequence number": 10552449, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 139 + } + }, + { + "ph": "f", "id": 17, "pid": 2338706, "tid": 2379422, "ts": 6345937243428.691, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937243464.724, "dur": 3.203, + "args": { + "External id": 977549,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937243470.374, "dur": 3113.007, + "args": { + "External id": 977550,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937243472.433, "dur": 3110.575, + "args": { + "External id": 977551,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937243478.189, "dur": 4.357, + "args": { + "External id": 977552,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937243483.615, "dur": 3098.324, + "args": { + "External id": 977553,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338706, "tid": 2379422, + "ts": 6345937246586.771, "dur": 0.431, + "args": { + "External id": 977554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338706, "tid": 2379422, + "ts": 6345937246588.795, "dur": 5.224, + "args": { + "External id": 977555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338706, "tid": 2379422, + "ts": 6345937246593.076, "dur": 0.799, + "args": { + "External id": 977556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338706, "tid": 2379422, + "ts": 6345937246598.692, "dur": 31.240, + "args": { + "External id": 977557,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338706, "tid": 2379422, + "ts": 6345937246637.731, "dur": 41.277, + "args": { + "External id": 977558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338706, "tid": 2379422, + "ts": 6345937246639.339, "dur": 39.459, + "args": { + "External id": 977559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338706, "tid": 2379422, + "ts": 6345937246640.590, "dur": 37.782, + "args": { + "External id": 977560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345937246830.998, "dur": 17.195, + "args": { + "External id": 977561,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937246864.669, "dur": 14.196, + "args": { + "External id": 977562,"Record function id": 0, "Sequence number": 10552448, "Fwd thread id": 1, "Ev Idx": 153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937246866.355, "dur": 9.026, + "args": { + "External id": 977563,"Sequence number": 10552448, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 154 + } + }, + { + "ph": "f", "id": 18, "pid": 2338706, "tid": 2379422, "ts": 6345937246866.355, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937246869.349, "dur": 5.788, + "args": { + "External id": 977564,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937246870.786, "dur": 4.155, + "args": { + "External id": 977565,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937246882.440, "dur": 79.493, + "args": { + "External id": 977566,"Record function id": 0, "Sequence number": 10552447, "Fwd thread id": 1, "Ev Idx": 157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937246883.648, "dur": 72.718, + "args": { + "External id": 977567,"Sequence number": 10552447, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 158 + } + }, + { + "ph": "f", "id": 19, "pid": 2338706, "tid": 2379422, "ts": 6345937246883.648, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937246888.913, "dur": 67.073, + "args": { + "External id": 977568,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345937246892.276, "dur": 24.683, + "args": { + "External id": 977569,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937246896.786, "dur": 3.439, + "args": { + "External id": 977570,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345937246901.233, "dur": 15.411, + "args": { + "External id": 977571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345937246903.124, "dur": 13.108, + "args": { + "External id": 977572,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937246918.757, "dur": 6.044, + "args": { + "External id": 977573,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937246923.155, "dur": 1.349, + "args": { + "External id": 977574,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937246926.128, "dur": 28.946, + "args": { + "External id": 977575,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937246966.277, "dur": 74.541, + "args": { + "External id": 977576,"Record function id": 0, "Sequence number": 10552446, "Fwd thread id": 1, "Ev Idx": 167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937246967.573, "dur": 69.289, + "args": { + "External id": 977577,"Sequence number": 10552446, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 168 + } + }, + { + "ph": "f", "id": 20, "pid": 2338706, "tid": 2379422, "ts": 6345937246967.573, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937246969.894, "dur": 66.625, + "args": { + "External id": 977578,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345937246972.265, "dur": 20.246, + "args": { + "External id": 977579,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937246973.207, "dur": 2.668, + "args": { + "External id": 977580,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345937246976.819, "dur": 15.446, + "args": { + "External id": 977581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345937246980.678, "dur": 11.251, + "args": { + "External id": 977582,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2379422, + "ts": 6345937246994.399, "dur": 7.433, + "args": { + "External id": 977583,"Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937247000.277, "dur": 1.022, + "args": { + "External id": 977584,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937247002.899, "dur": 32.838, + "args": { + "External id": 977585,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937247046.823, "dur": 155.084, + "args": { + "External id": 977586,"Record function id": 0, "Sequence number": 10552445, "Fwd thread id": 1, "Ev Idx": 177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937247048.373, "dur": 148.426, + "args": { + "External id": 977587,"Sequence number": 10552445, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 178 + } + }, + { + "ph": "f", "id": 21, "pid": 2338706, "tid": 2379422, "ts": 6345937247048.373, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937247050.452, "dur": 145.910, + "args": { + "External id": 977588,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345937247051.842, "dur": 51.486, + "args": { + "External id": 977589,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937247080.068, "dur": 3.796, + "args": { + "External id": 977590,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345937247084.964, "dur": 18.102, + "args": { + "External id": 977591,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345937247088.868, "dur": 13.727, + "args": { + "External id": 977592,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937247107.106, "dur": 5.433, + "args": { + "External id": 977593,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937247111.427, "dur": 0.773, + "args": { + "External id": 977594,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937247113.102, "dur": 82.126, + "args": { + "External id": 977595,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937247208.070, "dur": 103.125, + "args": { + "External id": 977596,"Record function id": 0, "Sequence number": 10552444, "Fwd thread id": 1, "Ev Idx": 187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937247209.348, "dur": 82.614, + "args": { + "External id": 977597,"Sequence number": 10552444, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 188 + } + }, + { + "ph": "f", "id": 22, "pid": 2338706, "tid": 2379422, "ts": 6345937247209.348, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937247213.221, "dur": 78.410, + "args": { + "External id": 977598,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345937247214.533, "dur": 17.760, + "args": { + "External id": 977599,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937247215.408, "dur": 4.466, + "args": { + "External id": 977600,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345937247220.374, "dur": 11.674, + "args": { + "External id": 977601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345937247221.158, "dur": 10.485, + "args": { + "External id": 977602,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937247233.743, "dur": 3.864, + "args": { + "External id": 977603,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937247236.584, "dur": 0.787, + "args": { + "External id": 977604,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937247240.582, "dur": 50.218, + "args": { + "External id": 977605,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345937247296.346, "dur": 13.600, + "args": { + "External id": 977606,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937247318.177, "dur": 36.087, + "args": { + "External id": 977607,"Record function id": 0, "Sequence number": 10552443, "Fwd thread id": 1, "Ev Idx": 198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937247319.789, "dur": 1.211, + "args": { + "External id": 977608,"Sequence number": 10552443, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 199 + } + }, + { + "ph": "f", "id": 23, "pid": 2338706, "tid": 2379422, "ts": 6345937247319.789, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937247323.383, "dur": 23.957, + "args": { + "External id": 977609,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937247325.098, "dur": 21.828, + "args": { + "External id": 977610,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937247330.210, "dur": 0.558, + "args": { + "External id": 977611,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937247359.332, "dur": 3439.496, + "args": { + "External id": 977612,"Record function id": 0, "Sequence number": 10552442, "Fwd thread id": 1, "Ev Idx": 203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937247373.141, "dur": 3391.192, + "args": { + "External id": 977613,"Sequence number": 10552442, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 204 + } + }, + { + "ph": "f", "id": 24, "pid": 2338706, "tid": 2379422, "ts": 6345937247373.141, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937247404.486, "dur": 3.254, + "args": { + "External id": 977614,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937247409.763, "dur": 3150.238, + "args": { + "External id": 977615,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937247411.551, "dur": 3148.097, + "args": { + "External id": 977616,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937247416.726, "dur": 3.283, + "args": { + "External id": 977617,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937247420.893, "dur": 3137.671, + "args": { + "External id": 977618,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338706, "tid": 2379422, + "ts": 6345937250563.682, "dur": 0.319, + "args": { + "External id": 977619,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338706, "tid": 2379422, + "ts": 6345937250565.405, "dur": 4.940, + "args": { + "External id": 977620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338706, "tid": 2379422, + "ts": 6345937250569.172, "dur": 1.011, + "args": { + "External id": 977621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338706, "tid": 2379422, + "ts": 6345937250574.566, "dur": 21.313, + "args": { + "External id": 977622,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338706, "tid": 2379422, + "ts": 6345937250600.558, "dur": 40.859, + "args": { + "External id": 977623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338706, "tid": 2379422, + "ts": 6345937250602.095, "dur": 39.148, + "args": { + "External id": 977624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338706, "tid": 2379422, + "ts": 6345937250603.389, "dur": 37.554, + "args": { + "External id": 977625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345937250778.131, "dur": 15.642, + "args": { + "External id": 977626,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937250815.832, "dur": 14.743, + "args": { + "External id": 977627,"Record function id": 0, "Ev Idx": 218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937250818.315, "dur": 10.124, + "args": { + "External id": 977628,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937250822.395, "dur": 4.841, + "args": { + "External id": 977629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937250823.448, "dur": 3.691, + "args": { + "External id": 977630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937250834.541, "dur": 15.956, + "args": { + "External id": 977631,"Record function id": 0, "Sequence number": 10552441, "Fwd thread id": 1, "Ev Idx": 222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937250835.803, "dur": 11.182, + "args": { + "External id": 977632,"Sequence number": 10552441, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 223 + } + }, + { + "ph": "f", "id": 25, "pid": 2338706, "tid": 2379422, "ts": 6345937250835.803, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937250839.148, "dur": 7.569, + "args": { + "External id": 977633,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937250842.956, "dur": 3.599, + "args": { + "External id": 977634,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937250854.042, "dur": 80.160, + "args": { + "External id": 977635,"Record function id": 0, "Sequence number": 10552440, "Fwd thread id": 1, "Ev Idx": 226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937250855.125, "dur": 72.963, + "args": { + "External id": 977636,"Sequence number": 10552440, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 227 + } + }, + { + "ph": "f", "id": 26, "pid": 2338706, "tid": 2379422, "ts": 6345937250855.125, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937250857.315, "dur": 70.404, + "args": { + "External id": 977637,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345937250863.153, "dur": 28.135, + "args": { + "External id": 977638,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937250865.394, "dur": 6.736, + "args": { + "External id": 977639,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345937250872.930, "dur": 18.034, + "args": { + "External id": 977640,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345937250874.100, "dur": 16.401, + "args": { + "External id": 977641,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937250893.031, "dur": 6.688, + "args": { + "External id": 977642,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937250898.162, "dur": 1.184, + "args": { + "External id": 977643,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937250900.702, "dur": 26.245, + "args": { + "External id": 977644,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937250938.588, "dur": 53.977, + "args": { + "External id": 977645,"Record function id": 0, "Sequence number": 10552439, "Fwd thread id": 1, "Ev Idx": 236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937250939.710, "dur": 49.487, + "args": { + "External id": 977646,"Sequence number": 10552439, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 237 + } + }, + { + "ph": "f", "id": 27, "pid": 2338706, "tid": 2379422, "ts": 6345937250939.710, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937250941.937, "dur": 47.040, + "args": { + "External id": 977647,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345937250943.819, "dur": 24.003, + "args": { + "External id": 977648,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937250949.217, "dur": 2.971, + "args": { + "External id": 977649,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345937250955.131, "dur": 12.437, + "args": { + "External id": 977650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345937250956.568, "dur": 10.645, + "args": { + "External id": 977651,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2379422, + "ts": 6345937250969.119, "dur": 5.085, + "args": { + "External id": 977652,"Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937250972.505, "dur": 1.207, + "args": { + "External id": 977653,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937250974.974, "dur": 13.488, + "args": { + "External id": 977654,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937250996.425, "dur": 160.790, + "args": { + "External id": 977655,"Record function id": 0, "Sequence number": 10552438, "Fwd thread id": 1, "Ev Idx": 246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937250997.877, "dur": 154.050, + "args": { + "External id": 977656,"Sequence number": 10552438, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 247 + } + }, + { + "ph": "f", "id": 28, "pid": 2338706, "tid": 2379422, "ts": 6345937250997.877, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937250999.854, "dur": 151.703, + "args": { + "External id": 977657,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345937251003.120, "dur": 32.926, + "args": { + "External id": 977658,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937251004.191, "dur": 2.254, + "args": { + "External id": 977659,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345937251006.937, "dur": 28.845, + "args": { + "External id": 977660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345937251023.275, "dur": 11.971, + "args": { + "External id": 977661,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937251037.041, "dur": 2.564, + "args": { + "External id": 977662,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937251038.423, "dur": 0.890, + "args": { + "External id": 977663,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937251040.532, "dur": 109.396, + "args": { + "External id": 977664,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937251164.858, "dur": 127.124, + "args": { + "External id": 977665,"Record function id": 0, "Sequence number": 10552437, "Fwd thread id": 1, "Ev Idx": 256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937251166.269, "dur": 104.926, + "args": { + "External id": 977666,"Sequence number": 10552437, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 257 + } + }, + { + "ph": "f", "id": 29, "pid": 2338706, "tid": 2379422, "ts": 6345937251166.269, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937251168.278, "dur": 102.636, + "args": { + "External id": 977667,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345937251169.872, "dur": 41.976, + "args": { + "External id": 977668,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937251175.205, "dur": 3.105, + "args": { + "External id": 977669,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345937251195.397, "dur": 16.180, + "args": { + "External id": 977670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345937251199.085, "dur": 11.963, + "args": { + "External id": 977671,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937251215.122, "dur": 2.299, + "args": { + "External id": 977672,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937251216.438, "dur": 0.712, + "args": { + "External id": 977673,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937251218.381, "dur": 51.791, + "args": { + "External id": 977674,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345937251276.235, "dur": 13.361, + "args": { + "External id": 977675,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937251298.945, "dur": 370.341, + "args": { + "External id": 977676,"Record function id": 0, "Sequence number": 10552436, "Fwd thread id": 1, "Ev Idx": 267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937251300.730, "dur": 357.341, + "args": { + "External id": 977677,"Sequence number": 10552436, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 268 + } + }, + { + "ph": "f", "id": 30, "pid": 2338706, "tid": 2379422, "ts": 6345937251300.730, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345937251489.106, "dur": 43.965, + "args": { + "External id": 977678,"kernel_hash": "cxv3u4whmia24aefmfuiol55m7osx2elddgy6rxbkbvmnigizet6", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "131072", "4096", "1", "993", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xv/cxv3u4whmia24aefmfuiol55m7osx2elddgy6rxbkbvmnigizet6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [4096], [131072, 4096], [131072, 4096], [132, 4096], [131072], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_0", "pid": 2338706, "tid": 2379422, + "ts": 6345937251571.910, "dur": 26.787, + "args": { + "External id": 977679,"kernel_hash": "chd4lphpu733jd27rokzqqzh23vkeai2aitqgvmlc7xjqod3vr6m", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/hd/chd4lphpu733jd27rokzqqzh23vkeai2aitqgvmlc7xjqod3vr6m.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_1", "pid": 2338706, "tid": 2379422, + "ts": 6345937251617.130, "dur": 19.576, + "args": { + "External id": 977680,"kernel_hash": "cginwarxhlqgizmvepo2jober6dgvr4lj6cd6nlltcu72kqzvxpx", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/gi/cginwarxhlqgizmvepo2jober6dgvr4lj6cd6nlltcu72kqzvxpx.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937251678.121, "dur": 17.184, + "args": { + "External id": 977681,"Record function id": 0, "Ev Idx": 272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937251680.416, "dur": 13.819, + "args": { + "External id": 977682,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937251683.600, "dur": 9.724, + "args": { + "External id": 977683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937251687.185, "dur": 6.007, + "args": { + "External id": 977684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: StackBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937251699.364, "dur": 39.767, + "args": { + "External id": 977685,"Record function id": 0, "Sequence number": 10552435, "Fwd thread id": 1, "Ev Idx": 276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "StackBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937251700.696, "dur": 27.870, + "args": { + "External id": 977686,"Sequence number": 10552435, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 277 + } + }, + { + "ph": "f", "id": 31, "pid": 2338706, "tid": 2379422, "ts": 6345937251700.696, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2379422, + "ts": 6345937251703.034, "dur": 8.624, + "args": { + "External id": 977687,"Record function id": 0, "Concrete Inputs": ["", "-2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937251708.614, "dur": 1.267, + "args": { + "External id": 977688,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2379422, + "ts": 6345937251712.390, "dur": 7.736, + "args": { + "External id": 977689,"Record function id": 0, "Concrete Inputs": ["", "-2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937251718.750, "dur": 0.659, + "args": { + "External id": 977690,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2379422, + "ts": 6345937251720.750, "dur": 2.054, + "args": { + "External id": 977691,"Record function id": 0, "Concrete Inputs": ["", "-2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937251721.618, "dur": 0.607, + "args": { + "External id": 977692,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2379422, + "ts": 6345937251723.171, "dur": 4.743, + "args": { + "External id": 977693,"Record function id": 0, "Concrete Inputs": ["", "-2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937251726.741, "dur": 0.459, + "args": { + "External id": 977694,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937251742.974, "dur": 7.430, + "args": { + "External id": 977695,"Record function id": 0, "Sequence number": 10552434, "Fwd thread id": 1, "Ev Idx": 286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937251744.113, "dur": 1.110, + "args": { + "External id": 977696,"Sequence number": 10552434, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 287 + } + }, + { + "ph": "f", "id": 32, "pid": 2338706, "tid": 2379422, "ts": 6345937251744.113, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937251755.112, "dur": 636.059, + "args": { + "External id": 977697,"Record function id": 0, "Sequence number": 10552433, "Fwd thread id": 1, "Ev Idx": 288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937251756.575, "dur": 614.093, + "args": { + "External id": 977698,"Sequence number": 10552433, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 289 + } + }, + { + "ph": "f", "id": 33, "pid": 2338706, "tid": 2379422, "ts": 6345937251756.575, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937251801.268, "dur": 13.910, + "args": { + "External id": 977699,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937251809.901, "dur": 4.933, + "args": { + "External id": 977700,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937251821.014, "dur": 10.256, + "args": { + "External id": 977701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937251824.570, "dur": 5.509, + "args": { + "External id": 977702,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937251828.996, "dur": 0.728, + "args": { + "External id": 977703,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2379422, + "ts": 6345937251837.414, "dur": 131.209, + "args": { + "External id": 977704,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937251838.704, "dur": 5.965, + "args": { + "External id": 977705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937251839.486, "dur": 4.410, + "args": { + "External id": 977706,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937251843.268, "dur": 0.510, + "args": { + "External id": 977707,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2379422, + "ts": 6345937251846.754, "dur": 121.173, + "args": { + "External id": 977708,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937251849.247, "dur": 117.673, + "args": { + "External id": 977709,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345937251977.271, "dur": 5.289, + "args": { + "External id": 977710,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937251979.402, "dur": 3.043, + "args": { + "External id": 977711,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937252045.926, "dur": 39.749, + "args": { + "External id": 977712,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937252088.353, "dur": 2.628, + "args": { + "External id": 977713,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937252094.420, "dur": 2.861, + "args": { + "External id": 977714,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937252144.957, "dur": 3.889, + "args": { + "External id": 977715,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937252145.933, "dur": 2.748, + "args": { + "External id": 977716,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338706, "tid": 2379422, + "ts": 6345937252179.583, "dur": 162.970, + "args": { + "External id": 977717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2379422, + "ts": 6345937252186.079, "dur": 11.862, + "args": { + "External id": 977718,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937252193.233, "dur": 1.331, + "args": { + "External id": 977719,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937252200.308, "dur": 7.479, + "args": { + "External id": 977720,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937252205.887, "dur": 0.724, + "args": { + "External id": 977721,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2379422, + "ts": 6345937252210.042, "dur": 7.284, + "args": { + "External id": 977722,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937252214.121, "dur": 2.875, + "args": { + "External id": 977723,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937252218.208, "dur": 4.376, + "args": { + "External id": 977724,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937252221.368, "dur": 0.645, + "args": { + "External id": 977725,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937252229.561, "dur": 4.600, + "args": { + "External id": 977726,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937252233.228, "dur": 0.656, + "args": { + "External id": 977727,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937252235.456, "dur": 8.367, + "args": { + "External id": 977728,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937252241.582, "dur": 2.013, + "args": { + "External id": 977729,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937252244.460, "dur": 3.764, + "args": { + "External id": 977730,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937252247.559, "dur": 0.362, + "args": { + "External id": 977731,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937252248.889, "dur": 2.482, + "args": { + "External id": 977732,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937252249.779, "dur": 1.489, + "args": { + "External id": 977733,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345937252253.415, "dur": 72.730, + "args": { + "External id": 977734,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937252327.990, "dur": 1.088, + "args": { + "External id": 977735,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937252332.802, "dur": 5.288, + "args": { + "External id": 977736,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937252334.045, "dur": 3.361, + "args": { + "External id": 977737,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937252340.203, "dur": 1.179, + "args": { + "External id": 977738,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937252404.977, "dur": 10.651, + "args": { + "External id": 977739,"Record function id": 0, "Ev Idx": 330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937252407.257, "dur": 7.411, + "args": { + "External id": 977740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937252410.037, "dur": 3.809, + "args": { + "External id": 977741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937252411.405, "dur": 2.283, + "args": { + "External id": 977742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937252419.810, "dur": 10.978, + "args": { + "External id": 977743,"Record function id": 0, "Sequence number": 10552432, "Fwd thread id": 1, "Ev Idx": 334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937252420.927, "dur": 7.012, + "args": { + "External id": 977744,"Sequence number": 10552432, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 335 + } + }, + { + "ph": "f", "id": 34, "pid": 2338706, "tid": 2379422, "ts": 6345937252420.927, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937252425.253, "dur": 2.436, + "args": { + "External id": 977745,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937252426.462, "dur": 1.113, + "args": { + "External id": 977746,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937252438.065, "dur": 148.780, + "args": { + "External id": 977747,"Record function id": 0, "Sequence number": 10552431, "Fwd thread id": 1, "Ev Idx": 338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937252439.197, "dur": 137.615, + "args": { + "External id": 977748,"Sequence number": 10552431, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 339 + } + }, + { + "ph": "f", "id": 35, "pid": 2338706, "tid": 2379422, "ts": 6345937252439.197, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937252445.492, "dur": 5.047, + "args": { + "External id": 977749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937252447.520, "dur": 2.449, + "args": { + "External id": 977750,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937252449.062, "dur": 0.733, + "args": { + "External id": 977751,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937252452.313, "dur": 58.044, + "args": { + "External id": 977752,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937252511.815, "dur": 7.080, + "args": { + "External id": 977753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937252515.622, "dur": 2.648, + "args": { + "External id": 977754,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937252516.846, "dur": 1.243, + "args": { + "External id": 977755,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937252521.089, "dur": 6.031, + "args": { + "External id": 977756,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937252522.358, "dur": 4.260, + "args": { + "External id": 977757,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937252525.966, "dur": 0.576, + "args": { + "External id": 977758,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937252527.813, "dur": 48.184, + "args": { + "External id": 977759,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937252591.846, "dur": 14.100, + "args": { + "External id": 977760,"Record function id": 0, "Sequence number": 10552430, "Fwd thread id": 1, "Ev Idx": 351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937252593.400, "dur": 10.080, + "args": { + "External id": 977761,"Sequence number": 10552430, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 352 + } + }, + { + "ph": "f", "id": 36, "pid": 2338706, "tid": 2379422, "ts": 6345937252593.400, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937252598.946, "dur": 4.390, + "args": { + "External id": 977762,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937252599.689, "dur": 3.476, + "args": { + "External id": 977763,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937252609.770, "dur": 11.894, + "args": { + "External id": 977764,"Record function id": 0, "Sequence number": 10552429, "Fwd thread id": 1, "Ev Idx": 355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937252610.922, "dur": 7.314, + "args": { + "External id": 977765,"Sequence number": 10552429, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 356 + } + }, + { + "ph": "f", "id": 37, "pid": 2338706, "tid": 2379422, "ts": 6345937252610.922, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937252612.467, "dur": 5.547, + "args": { + "External id": 977766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937252613.071, "dur": 4.426, + "args": { + "External id": 977767,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937252616.776, "dur": 0.597, + "args": { + "External id": 977768,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937252625.609, "dur": 8.267, + "args": { + "External id": 977769,"Record function id": 0, "Ev Idx": 360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937252627.032, "dur": 6.317, + "args": { + "External id": 977770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937252628.002, "dur": 4.839, + "args": { + "External id": 977771,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937252631.202, "dur": 1.511, + "args": { + "External id": 977772,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937252639.508, "dur": 7.632, + "args": { + "External id": 977773,"Record function id": 0, "Sequence number": 10552428, "Fwd thread id": 1, "Ev Idx": 364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937252640.724, "dur": 2.933, + "args": { + "External id": 977774,"Sequence number": 10552428, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 365 + } + }, + { + "ph": "f", "id": 38, "pid": 2338706, "tid": 2379422, "ts": 6345937252640.724, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937252641.965, "dur": 1.555, + "args": { + "External id": 977775,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937252642.693, "dur": 0.690, + "args": { + "External id": 977776,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937252650.261, "dur": 122.682, + "args": { + "External id": 977777,"Record function id": 0, "Sequence number": 10552427, "Fwd thread id": 1, "Ev Idx": 368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937252651.125, "dur": 110.793, + "args": { + "External id": 977778,"Sequence number": 10552427, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 369 + } + }, + { + "ph": "f", "id": 39, "pid": 2338706, "tid": 2379422, "ts": 6345937252651.125, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937252654.359, "dur": 5.019, + "args": { + "External id": 977779,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937252654.975, "dur": 3.917, + "args": { + "External id": 977780,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937252658.248, "dur": 0.505, + "args": { + "External id": 977781,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937252660.160, "dur": 43.056, + "args": { + "External id": 977782,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937252704.345, "dur": 6.140, + "args": { + "External id": 977783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937252705.555, "dur": 4.425, + "args": { + "External id": 977784,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937252709.109, "dur": 0.748, + "args": { + "External id": 977785,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937252711.870, "dur": 5.326, + "args": { + "External id": 977786,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937252715.006, "dur": 1.518, + "args": { + "External id": 977787,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937252715.727, "dur": 0.708, + "args": { + "External id": 977788,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937252717.787, "dur": 43.391, + "args": { + "External id": 977789,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937252777.299, "dur": 35.870, + "args": { + "External id": 977790,"Record function id": 0, "Sequence number": 10552426, "Fwd thread id": 1, "Ev Idx": 381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937252778.153, "dur": 6.115, + "args": { + "External id": 977791,"Sequence number": 10552426, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 382 + } + }, + { + "ph": "f", "id": 40, "pid": 2338706, "tid": 2379422, "ts": 6345937252778.153, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937252779.600, "dur": 4.515, + "args": { + "External id": 977792,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937252782.789, "dur": 1.172, + "args": { + "External id": 977793,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2379422, + "ts": 6345937252788.400, "dur": 21.438, + "args": { + "External id": 977794,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937252817.275, "dur": 10.562, + "args": { + "External id": 977795,"Record function id": 0, "Sequence number": 10552425, "Fwd thread id": 1, "Ev Idx": 386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937252821.236, "dur": 4.208, + "args": { + "External id": 977796,"Sequence number": 10552425, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 387 + } + }, + { + "ph": "f", "id": 41, "pid": 2338706, "tid": 2379422, "ts": 6345937252821.236, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937252822.409, "dur": 2.847, + "args": { + "External id": 977797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937252822.945, "dur": 1.839, + "args": { + "External id": 977798,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937252824.124, "dur": 0.544, + "args": { + "External id": 977799,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937252831.700, "dur": 5.185, + "args": { + "External id": 977800,"Record function id": 0, "Ev Idx": 391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937252833.024, "dur": 3.340, + "args": { + "External id": 977801,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937252834.223, "dur": 1.793, + "args": { + "External id": 977802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937252834.815, "dur": 1.074, + "args": { + "External id": 977803,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937252841.197, "dur": 536.462, + "args": { + "External id": 977804,"Record function id": 0, "Sequence number": 10552424, "Fwd thread id": 1, "Ev Idx": 395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937252842.760, "dur": 516.494, + "args": { + "External id": 977805,"Sequence number": 10552424, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 396 + } + }, + { + "ph": "f", "id": 42, "pid": 2338706, "tid": 2379422, "ts": 6345937252842.760, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2379422, + "ts": 6345937252872.742, "dur": 34.928, + "args": { + "External id": 977806,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345937252874.950, "dur": 32.520, + "args": { + "External id": 977807,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937252878.273, "dur": 6.143, + "args": { + "External id": 977808,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937252880.936, "dur": 2.806, + "args": { + "External id": 977809,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937252885.896, "dur": 21.065, + "args": { + "External id": 977810,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937252923.159, "dur": 2.133, + "args": { + "External id": 977811,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937252924.021, "dur": 1.131, + "args": { + "External id": 977812,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937252929.560, "dur": 1.470, + "args": { + "External id": 977813,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937252930.249, "dur": 0.689, + "args": { + "External id": 977814,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937252950.080, "dur": 4.740, + "args": { + "External id": 977815,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937252967.280, "dur": 2.361, + "args": { + "External id": 977816,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937253230.109, "dur": 4.721, + "args": { + "External id": 977817,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937253240.019, "dur": 42.410, + "args": { + "External id": 977818,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937253256.173, "dur": 1.133, + "args": { + "External id": 977819,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937253289.743, "dur": 31.805, + "args": { + "External id": 977820,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937253291.536, "dur": 29.747, + "args": { + "External id": 977821,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937253297.937, "dur": 3.995, + "args": { + "External id": 977822,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937253303.398, "dur": 17.193, + "args": { + "External id": 977823,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345937253326.742, "dur": 2.783, + "args": { + "External id": 977824,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937253328.131, "dur": 1.242, + "args": { + "External id": 977825,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937253337.149, "dur": 5.145, + "args": { + "External id": 977826,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937253340.878, "dur": 1.287, + "args": { + "External id": 977827,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937253344.597, "dur": 4.469, + "args": { + "External id": 977828,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937253348.005, "dur": 0.978, + "args": { + "External id": 977829,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937253392.080, "dur": 9.966, + "args": { + "External id": 977830,"Record function id": 0, "Ev Idx": 421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937253394.515, "dur": 6.647, + "args": { + "External id": 977831,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937253397.215, "dur": 2.921, + "args": { + "External id": 977832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937253398.347, "dur": 1.686, + "args": { + "External id": 977833,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937253405.616, "dur": 13.033, + "args": { + "External id": 977834,"Record function id": 0, "Sequence number": 10552423, "Fwd thread id": 1, "Ev Idx": 425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937253406.706, "dur": 8.268, + "args": { + "External id": 977835,"Sequence number": 10552423, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 426 + } + }, + { + "ph": "f", "id": 43, "pid": 2338706, "tid": 2379422, "ts": 6345937253406.706, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937253410.725, "dur": 4.011, + "args": { + "External id": 977836,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937253413.515, "dur": 1.107, + "args": { + "External id": 977837,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937253422.125, "dur": 167.999, + "args": { + "External id": 977838,"Record function id": 0, "Sequence number": 10552422, "Fwd thread id": 1, "Ev Idx": 429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937253423.121, "dur": 160.353, + "args": { + "External id": 977839,"Sequence number": 10552422, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 430 + } + }, + { + "ph": "f", "id": 44, "pid": 2338706, "tid": 2379422, "ts": 6345937253423.121, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937253427.357, "dur": 5.399, + "args": { + "External id": 977840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937253429.272, "dur": 2.840, + "args": { + "External id": 977841,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937253430.952, "dur": 0.935, + "args": { + "External id": 977842,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937253434.079, "dur": 82.226, + "args": { + "External id": 977843,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937253517.246, "dur": 7.730, + "args": { + "External id": 977844,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937253519.931, "dur": 4.345, + "args": { + "External id": 977845,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937253521.290, "dur": 2.775, + "args": { + "External id": 977846,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937253527.017, "dur": 5.234, + "args": { + "External id": 977847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937253527.965, "dur": 3.855, + "args": { + "External id": 977848,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937253531.252, "dur": 0.506, + "args": { + "External id": 977849,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937253533.160, "dur": 49.385, + "args": { + "External id": 977850,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937253595.098, "dur": 7.442, + "args": { + "External id": 977851,"Record function id": 0, "Sequence number": 10552421, "Fwd thread id": 1, "Ev Idx": 442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937253596.213, "dur": 3.828, + "args": { + "External id": 977852,"Sequence number": 10552421, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 443 + } + }, + { + "ph": "f", "id": 45, "pid": 2338706, "tid": 2379422, "ts": 6345937253596.213, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937253597.933, "dur": 1.946, + "args": { + "External id": 977853,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937253598.739, "dur": 1.003, + "args": { + "External id": 977854,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937253606.272, "dur": 12.088, + "args": { + "External id": 977855,"Record function id": 0, "Sequence number": 10552420, "Fwd thread id": 1, "Ev Idx": 446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937253607.042, "dur": 8.448, + "args": { + "External id": 977856,"Sequence number": 10552420, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 447 + } + }, + { + "ph": "f", "id": 46, "pid": 2338706, "tid": 2379422, "ts": 6345937253607.042, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937253610.444, "dur": 4.812, + "args": { + "External id": 977857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937253610.963, "dur": 3.790, + "args": { + "External id": 977858,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937253614.029, "dur": 0.598, + "args": { + "External id": 977859,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937253622.212, "dur": 5.443, + "args": { + "External id": 977860,"Record function id": 0, "Ev Idx": 451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937253623.471, "dur": 3.645, + "args": { + "External id": 977861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937253624.775, "dur": 2.029, + "args": { + "External id": 977862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937253625.598, "dur": 1.110, + "args": { + "External id": 977863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937253630.802, "dur": 7.987, + "args": { + "External id": 977864,"Record function id": 0, "Sequence number": 10552419, "Fwd thread id": 1, "Ev Idx": 455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937253631.932, "dur": 3.933, + "args": { + "External id": 977865,"Sequence number": 10552419, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 456 + } + }, + { + "ph": "f", "id": 47, "pid": 2338706, "tid": 2379422, "ts": 6345937253631.932, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937253633.506, "dur": 2.227, + "args": { + "External id": 977866,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937253634.517, "dur": 1.065, + "args": { + "External id": 977867,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937253646.403, "dur": 489.253, + "args": { + "External id": 977868,"Record function id": 0, "Sequence number": 10552418, "Fwd thread id": 1, "Ev Idx": 459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937253647.497, "dur": 459.497, + "args": { + "External id": 977869,"Sequence number": 10552418, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 460 + } + }, + { + "ph": "f", "id": 48, "pid": 2338706, "tid": 2379422, "ts": 6345937253647.497, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937253669.844, "dur": 14.354, + "args": { + "External id": 977870,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937253676.446, "dur": 7.182, + "args": { + "External id": 977871,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937253686.462, "dur": 4.152, + "args": { + "External id": 977872,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937253687.739, "dur": 2.656, + "args": { + "External id": 977873,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937253692.004, "dur": 6.189, + "args": { + "External id": 977874,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937253695.605, "dur": 2.371, + "args": { + "External id": 977875,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937253743.798, "dur": 296.780, + "args": { + "External id": 977876,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937253840.492, "dur": 5.399, + "args": { + "External id": 977877,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937253847.632, "dur": 2.599, + "args": { + "External id": 977878,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937253853.676, "dur": 2.259, + "args": { + "External id": 977879,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937253857.025, "dur": 2.298, + "args": { + "External id": 977880,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937253915.136, "dur": 5.210, + "args": { + "External id": 977881,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937253916.300, "dur": 3.910, + "args": { + "External id": 977882,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937253924.629, "dur": 27.039, + "args": { + "External id": 977883,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937253929.209, "dur": 3.229, + "args": { + "External id": 977884,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937253952.856, "dur": 1.348, + "args": { + "External id": 977885,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937253953.489, "dur": 0.659, + "args": { + "External id": 977886,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937253957.313, "dur": 14.783, + "args": { + "External id": 977887,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937253960.730, "dur": 0.786, + "args": { + "External id": 977888,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937254090.598, "dur": 4.973, + "args": { + "External id": 977889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937254100.067, "dur": 0.880, + "args": { + "External id": 977890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937254103.182, "dur": 0.722, + "args": { + "External id": 977891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937254148.087, "dur": 268.394, + "args": { + "External id": 977892,"Record function id": 0, "Sequence number": 10552417, "Fwd thread id": 1, "Ev Idx": 483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937254150.053, "dur": 258.084, + "args": { + "External id": 977893,"Sequence number": 10552417, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 484 + } + }, + { + "ph": "f", "id": 49, "pid": 2338706, "tid": 2379422, "ts": 6345937254150.053, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345937254178.790, "dur": 52.660, + "args": { + "External id": 977894,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937254182.111, "dur": 4.827, + "args": { + "External id": 977895,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937254188.432, "dur": 42.229, + "args": { + "External id": 977896,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937254243.545, "dur": 7.760, + "args": { + "External id": 977897,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937254247.979, "dur": 3.017, + "args": { + "External id": 977898,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937254424.098, "dur": 186.703, + "args": { + "External id": 977899,"Record function id": 0, "Sequence number": 10552416, "Fwd thread id": 1, "Ev Idx": 490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937254425.808, "dur": 177.373, + "args": { + "External id": 977900,"Sequence number": 10552416, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 491 + } + }, + { + "ph": "f", "id": 50, "pid": 2338706, "tid": 2379422, "ts": 6345937254425.808, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345937254438.644, "dur": 51.190, + "args": { + "External id": 977901,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937254440.812, "dur": 3.108, + "args": { + "External id": 977902,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937254444.998, "dur": 44.272, + "args": { + "External id": 977903,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937254497.309, "dur": 7.760, + "args": { + "External id": 977904,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937254501.471, "dur": 3.203, + "args": { + "External id": 977905,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937254617.702, "dur": 16.058, + "args": { + "External id": 977906,"Record function id": 0, "Sequence number": 10552415, "Fwd thread id": 1, "Ev Idx": 497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937254619.778, "dur": 10.436, + "args": { + "External id": 977907,"Sequence number": 10552415, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 498 + } + }, + { + "ph": "f", "id": 51, "pid": 2338706, "tid": 2379422, "ts": 6345937254619.778, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937254622.307, "dur": 7.642, + "args": { + "External id": 977908,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937254623.760, "dur": 6.031, + "args": { + "External id": 977909,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937254637.308, "dur": 10.802, + "args": { + "External id": 977910,"Record function id": 0, "Sequence number": 10552414, "Fwd thread id": 1, "Ev Idx": 501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937254638.520, "dur": 6.364, + "args": { + "External id": 977911,"Sequence number": 10552414, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 502 + } + }, + { + "ph": "f", "id": 52, "pid": 2338706, "tid": 2379422, "ts": 6345937254638.520, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937254639.845, "dur": 4.896, + "args": { + "External id": 977912,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937254643.611, "dur": 0.971, + "args": { + "External id": 977913,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937254651.467, "dur": 5.804, + "args": { + "External id": 977914,"Record function id": 0, "Sequence number": 10552413, "Fwd thread id": 1, "Ev Idx": 505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937254652.376, "dur": 3.345, + "args": { + "External id": 977915,"Sequence number": 10552413, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 506 + } + }, + { + "ph": "f", "id": 53, "pid": 2338706, "tid": 2379422, "ts": 6345937254652.376, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937254654.042, "dur": 1.555, + "args": { + "External id": 977916,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937254654.612, "dur": 0.898, + "args": { + "External id": 977917,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937254660.734, "dur": 103.647, + "args": { + "External id": 977918,"Record function id": 0, "Sequence number": 10552412, "Fwd thread id": 1, "Ev Idx": 509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937254661.745, "dur": 99.100, + "args": { + "External id": 977919,"Sequence number": 10552412, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 510 + } + }, + { + "ph": "f", "id": 54, "pid": 2338706, "tid": 2379422, "ts": 6345937254661.745, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937254665.060, "dur": 95.640, + "args": { + "External id": 977920,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937254759.202, "dur": 1.239, + "args": { + "External id": 977921,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937254768.298, "dur": 163.669, + "args": { + "External id": 977922,"Record function id": 0, "Sequence number": 10552411, "Fwd thread id": 1, "Ev Idx": 513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937254769.295, "dur": 153.673, + "args": { + "External id": 977923,"Sequence number": 10552411, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 514 + } + }, + { + "ph": "f", "id": 55, "pid": 2338706, "tid": 2379422, "ts": 6345937254769.295, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937254773.845, "dur": 6.386, + "args": { + "External id": 977924,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937254776.085, "dur": 3.417, + "args": { + "External id": 977925,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937254777.770, "dur": 1.471, + "args": { + "External id": 977926,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937254784.714, "dur": 71.465, + "args": { + "External id": 977927,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937254857.758, "dur": 5.783, + "args": { + "External id": 977928,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937254858.627, "dur": 4.184, + "args": { + "External id": 977929,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937254859.707, "dur": 2.922, + "args": { + "External id": 977930,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937254865.794, "dur": 8.384, + "args": { + "External id": 977931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937254866.983, "dur": 6.771, + "args": { + "External id": 977932,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937254873.163, "dur": 0.461, + "args": { + "External id": 977933,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937254874.987, "dur": 47.214, + "args": { + "External id": 977934,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937254937.770, "dur": 7.053, + "args": { + "External id": 977935,"Record function id": 0, "Sequence number": 10552410, "Fwd thread id": 1, "Ev Idx": 526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937254938.808, "dur": 3.980, + "args": { + "External id": 977936,"Sequence number": 10552410, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 527 + } + }, + { + "ph": "f", "id": 56, "pid": 2338706, "tid": 2379422, "ts": 6345937254938.808, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937254940.829, "dur": 1.832, + "args": { + "External id": 977937,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937254941.344, "dur": 1.172, + "args": { + "External id": 977938,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937254948.010, "dur": 12.548, + "args": { + "External id": 977939,"Record function id": 0, "Sequence number": 10552409, "Fwd thread id": 1, "Ev Idx": 530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937254948.896, "dur": 8.752, + "args": { + "External id": 977940,"Sequence number": 10552409, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 531 + } + }, + { + "ph": "f", "id": 57, "pid": 2338706, "tid": 2379422, "ts": 6345937254948.896, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937254950.103, "dur": 7.319, + "args": { + "External id": 977941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937254953.375, "dur": 3.588, + "args": { + "External id": 977942,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937254956.404, "dur": 0.423, + "args": { + "External id": 977943,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937254966.296, "dur": 9.454, + "args": { + "External id": 977944,"Record function id": 0, "Ev Idx": 535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937254967.961, "dur": 7.027, + "args": { + "External id": 977945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937254970.623, "dur": 4.011, + "args": { + "External id": 977946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937254971.934, "dur": 2.590, + "args": { + "External id": 977947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937254981.316, "dur": 6.566, + "args": { + "External id": 977948,"Record function id": 0, "Sequence number": 10552408, "Fwd thread id": 1, "Ev Idx": 539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937254982.430, "dur": 2.857, + "args": { + "External id": 977949,"Sequence number": 10552408, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 540 + } + }, + { + "ph": "f", "id": 58, "pid": 2338706, "tid": 2379422, "ts": 6345937254982.430, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937254983.743, "dur": 1.387, + "args": { + "External id": 977950,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937254984.294, "dur": 0.688, + "args": { + "External id": 977951,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937254991.144, "dur": 163.659, + "args": { + "External id": 977952,"Record function id": 0, "Sequence number": 10552407, "Fwd thread id": 1, "Ev Idx": 543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937254994.708, "dur": 150.458, + "args": { + "External id": 977953,"Sequence number": 10552407, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 544 + } + }, + { + "ph": "f", "id": 59, "pid": 2338706, "tid": 2379422, "ts": 6345937254994.708, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937254997.737, "dur": 2.242, + "args": { + "External id": 977954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937254998.179, "dur": 1.368, + "args": { + "External id": 977955,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937254998.828, "dur": 0.563, + "args": { + "External id": 977956,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937255000.628, "dur": 48.924, + "args": { + "External id": 977957,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937255051.777, "dur": 42.108, + "args": { + "External id": 977958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937255085.447, "dur": 7.398, + "args": { + "External id": 977959,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937255091.400, "dur": 1.166, + "args": { + "External id": 977960,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937255095.448, "dur": 4.855, + "args": { + "External id": 977961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937255096.310, "dur": 3.543, + "args": { + "External id": 977962,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937255097.017, "dur": 2.771, + "args": { + "External id": 977963,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937255101.046, "dur": 43.382, + "args": { + "External id": 977964,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937255161.398, "dur": 35.839, + "args": { + "External id": 977965,"Record function id": 0, "Sequence number": 10552406, "Fwd thread id": 1, "Ev Idx": 556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937255162.667, "dur": 6.676, + "args": { + "External id": 977966,"Sequence number": 10552406, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 557 + } + }, + { + "ph": "f", "id": 60, "pid": 2338706, "tid": 2379422, "ts": 6345937255162.667, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937255164.422, "dur": 4.775, + "args": { + "External id": 977967,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937255167.700, "dur": 1.361, + "args": { + "External id": 977968,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2379422, + "ts": 6345937255172.325, "dur": 22.273, + "args": { + "External id": 977969,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937255204.287, "dur": 7.346, + "args": { + "External id": 977970,"Record function id": 0, "Sequence number": 10552405, "Fwd thread id": 1, "Ev Idx": 561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937255205.420, "dur": 4.251, + "args": { + "External id": 977971,"Sequence number": 10552405, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 562 + } + }, + { + "ph": "f", "id": 61, "pid": 2338706, "tid": 2379422, "ts": 6345937255205.420, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937255206.314, "dur": 3.147, + "args": { + "External id": 977972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937255207.125, "dur": 1.778, + "args": { + "External id": 977973,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937255208.355, "dur": 0.423, + "args": { + "External id": 977974,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937255215.950, "dur": 5.575, + "args": { + "External id": 977975,"Record function id": 0, "Ev Idx": 566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937255217.155, "dur": 3.842, + "args": { + "External id": 977976,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937255218.538, "dur": 1.934, + "args": { + "External id": 977977,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937255219.134, "dur": 1.224, + "args": { + "External id": 977978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937255224.910, "dur": 12.411, + "args": { + "External id": 977979,"Record function id": 0, "Sequence number": 10552404, "Fwd thread id": 1, "Ev Idx": 570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937255226.222, "dur": 7.648, + "args": { + "External id": 977980,"Sequence number": 10552404, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 571 + } + }, + { + "ph": "f", "id": 62, "pid": 2338706, "tid": 2379422, "ts": 6345937255226.222, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937255229.749, "dur": 3.968, + "args": { + "External id": 977981,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937255232.731, "dur": 0.843, + "args": { + "External id": 977982,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937255240.482, "dur": 114.429, + "args": { + "External id": 977983,"Record function id": 0, "Sequence number": 10552403, "Fwd thread id": 1, "Ev Idx": 574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937255241.524, "dur": 100.019, + "args": { + "External id": 977984,"Sequence number": 10552403, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 575 + } + }, + { + "ph": "f", "id": 63, "pid": 2338706, "tid": 2379422, "ts": 6345937255241.524, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937255245.829, "dur": 2.187, + "args": { + "External id": 977985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937255246.254, "dur": 1.333, + "args": { + "External id": 977986,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937255246.881, "dur": 0.595, + "args": { + "External id": 977987,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937255255.377, "dur": 38.279, + "args": { + "External id": 977988,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937255294.731, "dur": 4.914, + "args": { + "External id": 977989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937255295.170, "dur": 3.928, + "args": { + "External id": 977990,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937255296.154, "dur": 2.784, + "args": { + "External id": 977991,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937255300.927, "dur": 5.276, + "args": { + "External id": 977992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937255301.991, "dur": 3.621, + "args": { + "External id": 977993,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937255305.050, "dur": 0.435, + "args": { + "External id": 977994,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937255308.937, "dur": 32.080, + "args": { + "External id": 977995,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937255359.497, "dur": 23.826, + "args": { + "External id": 977996,"Record function id": 0, "Sequence number": 10552402, "Fwd thread id": 1, "Ev Idx": 587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937255360.624, "dur": 3.476, + "args": { + "External id": 977997,"Sequence number": 10552402, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 588 + } + }, + { + "ph": "f", "id": 64, "pid": 2338706, "tid": 2379422, "ts": 6345937255360.624, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937255362.007, "dur": 1.943, + "args": { + "External id": 977998,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937255362.703, "dur": 1.087, + "args": { + "External id": 977999,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345937255366.543, "dur": 14.727, + "args": { + "External id": 978000,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937255387.224, "dur": 10.933, + "args": { + "External id": 978001,"Record function id": 0, "Sequence number": 10552401, "Fwd thread id": 1, "Ev Idx": 592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937255388.051, "dur": 6.974, + "args": { + "External id": 978002,"Sequence number": 10552401, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 593 + } + }, + { + "ph": "f", "id": 65, "pid": 2338706, "tid": 2379422, "ts": 6345937255388.051, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937255389.221, "dur": 5.625, + "args": { + "External id": 978003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937255390.111, "dur": 4.256, + "args": { + "External id": 978004,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937255393.884, "dur": 0.364, + "args": { + "External id": 978005,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937255402.138, "dur": 7.771, + "args": { + "External id": 978006,"Record function id": 0, "Ev Idx": 597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937255403.538, "dur": 5.875, + "args": { + "External id": 978007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937255404.478, "dur": 4.459, + "args": { + "External id": 978008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937255407.317, "dur": 1.441, + "args": { + "External id": 978009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937255414.725, "dur": 391.758, + "args": { + "External id": 978010,"Record function id": 0, "Sequence number": 10552400, "Fwd thread id": 1, "Ev Idx": 601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937255415.711, "dur": 355.828, + "args": { + "External id": 978011,"Sequence number": 10552400, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 602 + } + }, + { + "ph": "f", "id": 66, "pid": 2338706, "tid": 2379422, "ts": 6345937255415.711, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937255454.805, "dur": 2.014, + "args": { + "External id": 978012,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937255455.461, "dur": 1.248, + "args": { + "External id": 978013,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937255471.811, "dur": 6.256, + "args": { + "External id": 978014,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937255490.371, "dur": 1.932, + "args": { + "External id": 978015,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937255664.517, "dur": 1.849, + "args": { + "External id": 978016,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937255670.539, "dur": 35.391, + "args": { + "External id": 978017,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937255680.705, "dur": 0.800, + "args": { + "External id": 978018,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937255711.801, "dur": 36.314, + "args": { + "External id": 978019,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937255713.683, "dur": 34.222, + "args": { + "External id": 978020,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937255720.207, "dur": 6.718, + "args": { + "External id": 978021,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937255728.465, "dur": 18.825, + "args": { + "External id": 978022,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345937255754.525, "dur": 2.560, + "args": { + "External id": 978023,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937255755.854, "dur": 1.085, + "args": { + "External id": 978024,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937255762.669, "dur": 1.825, + "args": { + "External id": 978025,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937255763.321, "dur": 1.073, + "args": { + "External id": 978026,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2379422, + "ts": 6345937255783.607, "dur": 16.430, + "args": { + "External id": 978027,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937255815.454, "dur": 10.173, + "args": { + "External id": 978028,"Record function id": 0, "Ev Idx": 619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937255819.238, "dur": 5.716, + "args": { + "External id": 978029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937255820.872, "dur": 2.850, + "args": { + "External id": 978030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937255821.730, "dur": 1.833, + "args": { + "External id": 978031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937255831.842, "dur": 7.617, + "args": { + "External id": 978032,"Record function id": 0, "Sequence number": 10552399, "Fwd thread id": 1, "Ev Idx": 623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937255833.503, "dur": 1.169, + "args": { + "External id": 978033,"Sequence number": 10552399, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 624 + } + }, + { + "ph": "f", "id": 67, "pid": 2338706, "tid": 2379422, "ts": 6345937255833.503, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937255843.556, "dur": 498.364, + "args": { + "External id": 978034,"Record function id": 0, "Sequence number": 10552398, "Fwd thread id": 1, "Ev Idx": 625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937255844.729, "dur": 482.301, + "args": { + "External id": 978035,"Sequence number": 10552398, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 626 + } + }, + { + "ph": "f", "id": 68, "pid": 2338706, "tid": 2379422, "ts": 6345937255844.729, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937255876.374, "dur": 10.668, + "args": { + "External id": 978036,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937255883.318, "dur": 3.446, + "args": { + "External id": 978037,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937255890.127, "dur": 8.654, + "args": { + "External id": 978038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937255891.687, "dur": 6.501, + "args": { + "External id": 978039,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937255895.239, "dur": 2.786, + "args": { + "External id": 978040,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2379422, + "ts": 6345937255902.359, "dur": 82.322, + "args": { + "External id": 978041,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937255903.132, "dur": 5.698, + "args": { + "External id": 978042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937255903.934, "dur": 4.403, + "args": { + "External id": 978043,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937255907.542, "dur": 0.671, + "args": { + "External id": 978044,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2379422, + "ts": 6345937255909.926, "dur": 74.357, + "args": { + "External id": 978045,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937255911.483, "dur": 71.963, + "args": { + "External id": 978046,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345937255988.497, "dur": 2.599, + "args": { + "External id": 978047,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937255989.616, "dur": 1.376, + "args": { + "External id": 978048,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937256046.624, "dur": 40.758, + "args": { + "External id": 978049,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937256095.080, "dur": 2.339, + "args": { + "External id": 978050,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937256098.296, "dur": 1.947, + "args": { + "External id": 978051,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937256141.232, "dur": 2.649, + "args": { + "External id": 978052,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937256141.938, "dur": 1.762, + "args": { + "External id": 978053,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338706, "tid": 2379422, + "ts": 6345937256167.524, "dur": 139.560, + "args": { + "External id": 978054,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2379422, + "ts": 6345937256172.739, "dur": 9.348, + "args": { + "External id": 978055,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937256178.324, "dur": 2.903, + "args": { + "External id": 978056,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937256183.572, "dur": 8.517, + "args": { + "External id": 978057,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937256190.592, "dur": 0.549, + "args": { + "External id": 978058,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2379422, + "ts": 6345937256196.265, "dur": 1.272, + "args": { + "External id": 978059,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937256196.912, "dur": 0.306, + "args": { + "External id": 978060,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937256200.331, "dur": 2.166, + "args": { + "External id": 978061,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937256201.410, "dur": 0.606, + "args": { + "External id": 978062,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937256208.230, "dur": 1.599, + "args": { + "External id": 978063,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937256209.145, "dur": 0.402, + "args": { + "External id": 978064,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937256210.454, "dur": 7.749, + "args": { + "External id": 978065,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937256215.953, "dur": 2.021, + "args": { + "External id": 978066,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937256218.754, "dur": 4.185, + "args": { + "External id": 978067,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937256222.192, "dur": 0.484, + "args": { + "External id": 978068,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937256223.902, "dur": 6.374, + "args": { + "External id": 978069,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937256226.670, "dur": 3.508, + "args": { + "External id": 978070,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345937256231.747, "dur": 59.907, + "args": { + "External id": 978071,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937256295.969, "dur": 1.763, + "args": { + "External id": 978072,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937256298.387, "dur": 4.388, + "args": { + "External id": 978073,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937256301.699, "dur": 0.513, + "args": { + "External id": 978074,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937256305.292, "dur": 0.842, + "args": { + "External id": 978075,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937256353.670, "dur": 9.922, + "args": { + "External id": 978076,"Record function id": 0, "Ev Idx": 667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937256355.867, "dur": 6.906, + "args": { + "External id": 978077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937256358.046, "dur": 3.841, + "args": { + "External id": 978078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937256359.095, "dur": 2.668, + "args": { + "External id": 978079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937256367.307, "dur": 9.525, + "args": { + "External id": 978080,"Record function id": 0, "Sequence number": 10552397, "Fwd thread id": 1, "Ev Idx": 671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937256368.695, "dur": 5.841, + "args": { + "External id": 978081,"Sequence number": 10552397, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 672 + } + }, + { + "ph": "f", "id": 69, "pid": 2338706, "tid": 2379422, "ts": 6345937256368.695, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937256370.676, "dur": 3.648, + "args": { + "External id": 978082,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937256373.386, "dur": 0.790, + "args": { + "External id": 978083,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937256380.190, "dur": 137.049, + "args": { + "External id": 978084,"Record function id": 0, "Sequence number": 10552396, "Fwd thread id": 1, "Ev Idx": 675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937256381.134, "dur": 129.120, + "args": { + "External id": 978085,"Sequence number": 10552396, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 676 + } + }, + { + "ph": "f", "id": 70, "pid": 2338706, "tid": 2379422, "ts": 6345937256381.134, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937256385.841, "dur": 3.814, + "args": { + "External id": 978086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937256387.126, "dur": 1.999, + "args": { + "External id": 978087,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937256388.376, "dur": 0.492, + "args": { + "External id": 978088,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937256390.984, "dur": 42.522, + "args": { + "External id": 978089,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937256436.602, "dur": 6.130, + "args": { + "External id": 978090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937256437.378, "dur": 4.449, + "args": { + "External id": 978091,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937256440.858, "dur": 0.796, + "args": { + "External id": 978092,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937256444.814, "dur": 23.075, + "args": { + "External id": 978093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937256463.041, "dur": 4.293, + "args": { + "External id": 978094,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937256464.058, "dur": 3.173, + "args": { + "External id": 978095,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937256470.792, "dur": 38.380, + "args": { + "External id": 978096,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937256521.947, "dur": 7.974, + "args": { + "External id": 978097,"Record function id": 0, "Sequence number": 10552395, "Fwd thread id": 1, "Ev Idx": 688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937256522.881, "dur": 5.807, + "args": { + "External id": 978098,"Sequence number": 10552395, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 689 + } + }, + { + "ph": "f", "id": 71, "pid": 2338706, "tid": 2379422, "ts": 6345937256522.881, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937256524.686, "dur": 3.844, + "args": { + "External id": 978099,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937256527.454, "dur": 0.915, + "args": { + "External id": 978100,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937256533.424, "dur": 9.112, + "args": { + "External id": 978101,"Record function id": 0, "Sequence number": 10552394, "Fwd thread id": 1, "Ev Idx": 692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937256534.337, "dur": 5.996, + "args": { + "External id": 978102,"Sequence number": 10552394, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 693 + } + }, + { + "ph": "f", "id": 72, "pid": 2338706, "tid": 2379422, "ts": 6345937256534.337, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937256535.147, "dur": 4.971, + "args": { + "External id": 978103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937256535.914, "dur": 3.757, + "args": { + "External id": 978104,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937256538.916, "dur": 0.654, + "args": { + "External id": 978105,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937256546.354, "dur": 5.131, + "args": { + "External id": 978106,"Record function id": 0, "Ev Idx": 697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937256547.664, "dur": 3.295, + "args": { + "External id": 978107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937256548.654, "dur": 1.862, + "args": { + "External id": 978108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937256549.057, "dur": 1.361, + "args": { + "External id": 978109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937256554.619, "dur": 9.462, + "args": { + "External id": 978110,"Record function id": 0, "Sequence number": 10552393, "Fwd thread id": 1, "Ev Idx": 701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937256555.719, "dur": 5.654, + "args": { + "External id": 978111,"Sequence number": 10552393, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 702 + } + }, + { + "ph": "f", "id": 73, "pid": 2338706, "tid": 2379422, "ts": 6345937256555.719, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937256557.254, "dur": 3.962, + "args": { + "External id": 978112,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937256560.315, "dur": 0.781, + "args": { + "External id": 978113,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937256567.211, "dur": 99.917, + "args": { + "External id": 978114,"Record function id": 0, "Sequence number": 10552392, "Fwd thread id": 1, "Ev Idx": 705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937256568.302, "dur": 90.218, + "args": { + "External id": 978115,"Sequence number": 10552392, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 706 + } + }, + { + "ph": "f", "id": 74, "pid": 2338706, "tid": 2379422, "ts": 6345937256568.302, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937256574.374, "dur": 2.748, + "args": { + "External id": 978116,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937256575.147, "dur": 1.570, + "args": { + "External id": 978117,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937256576.010, "dur": 0.605, + "args": { + "External id": 978118,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937256577.872, "dur": 33.733, + "args": { + "External id": 978119,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937256612.767, "dur": 7.942, + "args": { + "External id": 978120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937256613.401, "dur": 6.752, + "args": { + "External id": 978121,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937256616.909, "dur": 3.132, + "args": { + "External id": 978122,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937256622.097, "dur": 5.266, + "args": { + "External id": 978123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937256622.828, "dur": 3.947, + "args": { + "External id": 978124,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937256626.223, "dur": 0.472, + "args": { + "External id": 978125,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937256627.821, "dur": 30.078, + "args": { + "External id": 978126,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937256671.373, "dur": 29.647, + "args": { + "External id": 978127,"Record function id": 0, "Sequence number": 10552391, "Fwd thread id": 1, "Ev Idx": 718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937256672.468, "dur": 3.463, + "args": { + "External id": 978128,"Sequence number": 10552391, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 719 + } + }, + { + "ph": "f", "id": 75, "pid": 2338706, "tid": 2379422, "ts": 6345937256672.468, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937256674.141, "dur": 1.647, + "args": { + "External id": 978129,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937256674.759, "dur": 0.901, + "args": { + "External id": 978130,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2379422, + "ts": 6345937256678.655, "dur": 19.697, + "args": { + "External id": 978131,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937256704.428, "dur": 9.971, + "args": { + "External id": 978132,"Record function id": 0, "Sequence number": 10552390, "Fwd thread id": 1, "Ev Idx": 723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937256705.232, "dur": 6.274, + "args": { + "External id": 978133,"Sequence number": 10552390, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 724 + } + }, + { + "ph": "f", "id": 76, "pid": 2338706, "tid": 2379422, "ts": 6345937256705.232, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937256706.262, "dur": 5.028, + "args": { + "External id": 978134,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937256709.109, "dur": 1.710, + "args": { + "External id": 978135,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937256709.964, "dur": 0.702, + "args": { + "External id": 978136,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937256720.292, "dur": 4.545, + "args": { + "External id": 978137,"Record function id": 0, "Ev Idx": 728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937256721.435, "dur": 2.861, + "args": { + "External id": 978138,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937256722.480, "dur": 1.483, + "args": { + "External id": 978139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937256722.862, "dur": 0.980, + "args": { + "External id": 978140,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937256730.718, "dur": 488.274, + "args": { + "External id": 978141,"Record function id": 0, "Sequence number": 10552389, "Fwd thread id": 1, "Ev Idx": 732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937256731.935, "dur": 447.212, + "args": { + "External id": 978142,"Sequence number": 10552389, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 733 + } + }, + { + "ph": "f", "id": 77, "pid": 2338706, "tid": 2379422, "ts": 6345937256731.935, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2379422, + "ts": 6345937256754.527, "dur": 31.411, + "args": { + "External id": 978143,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345937256756.063, "dur": 29.678, + "args": { + "External id": 978144,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937256758.610, "dur": 5.608, + "args": { + "External id": 978145,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937256761.012, "dur": 2.645, + "args": { + "External id": 978146,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937256765.414, "dur": 19.848, + "args": { + "External id": 978147,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937256796.437, "dur": 8.106, + "args": { + "External id": 978148,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937256801.402, "dur": 2.984, + "args": { + "External id": 978149,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937256808.163, "dur": 1.526, + "args": { + "External id": 978150,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937256808.859, "dur": 0.738, + "args": { + "External id": 978151,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937256821.746, "dur": 3.053, + "args": { + "External id": 978152,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937256837.524, "dur": 2.573, + "args": { + "External id": 978153,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937257000.064, "dur": 1.822, + "args": { + "External id": 978154,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937257006.157, "dur": 92.842, + "args": { + "External id": 978155,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937257039.926, "dur": 1.010, + "args": { + "External id": 978156,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937257107.552, "dur": 30.746, + "args": { + "External id": 978157,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937257109.230, "dur": 28.858, + "args": { + "External id": 978158,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937257113.763, "dur": 6.349, + "args": { + "External id": 978159,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937257121.630, "dur": 15.810, + "args": { + "External id": 978160,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345937257145.659, "dur": 4.302, + "args": { + "External id": 978161,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937257148.312, "dur": 1.509, + "args": { + "External id": 978162,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937257158.102, "dur": 4.907, + "args": { + "External id": 978163,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937257159.079, "dur": 3.833, + "args": { + "External id": 978164,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937257165.084, "dur": 3.719, + "args": { + "External id": 978165,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937257167.585, "dur": 1.129, + "args": { + "External id": 978166,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345937257200.581, "dur": 16.993, + "args": { + "External id": 978167,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937257233.197, "dur": 8.953, + "args": { + "External id": 978168,"Record function id": 0, "Ev Idx": 759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937257235.173, "dur": 6.137, + "args": { + "External id": 978169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937257237.521, "dur": 2.705, + "args": { + "External id": 978170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937257238.543, "dur": 1.583, + "args": { + "External id": 978171,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937257245.711, "dur": 8.726, + "args": { + "External id": 978172,"Record function id": 0, "Sequence number": 10552388, "Fwd thread id": 1, "Ev Idx": 763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937257246.881, "dur": 4.316, + "args": { + "External id": 978173,"Sequence number": 10552388, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 764 + } + }, + { + "ph": "f", "id": 78, "pid": 2338706, "tid": 2379422, "ts": 6345937257246.881, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937257248.751, "dur": 2.188, + "args": { + "External id": 978174,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937257249.450, "dur": 1.346, + "args": { + "External id": 978175,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937257257.819, "dur": 136.128, + "args": { + "External id": 978176,"Record function id": 0, "Sequence number": 10552387, "Fwd thread id": 1, "Ev Idx": 767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937257258.888, "dur": 126.251, + "args": { + "External id": 978177,"Sequence number": 10552387, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 768 + } + }, + { + "ph": "f", "id": 79, "pid": 2338706, "tid": 2379422, "ts": 6345937257258.888, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937257265.515, "dur": 4.971, + "args": { + "External id": 978178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937257267.329, "dur": 2.486, + "args": { + "External id": 978179,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937257268.603, "dur": 1.004, + "args": { + "External id": 978180,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937257271.517, "dur": 64.250, + "args": { + "External id": 978181,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937257337.164, "dur": 5.613, + "args": { + "External id": 978182,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937257337.743, "dur": 4.443, + "args": { + "External id": 978183,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937257341.162, "dur": 0.877, + "args": { + "External id": 978184,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937257346.703, "dur": 2.564, + "args": { + "External id": 978185,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937257347.557, "dur": 1.053, + "args": { + "External id": 978186,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937257348.148, "dur": 0.376, + "args": { + "External id": 978187,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937257349.765, "dur": 34.616, + "args": { + "External id": 978188,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937257398.784, "dur": 11.942, + "args": { + "External id": 978189,"Record function id": 0, "Sequence number": 10552386, "Fwd thread id": 1, "Ev Idx": 780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937257399.735, "dur": 7.857, + "args": { + "External id": 978190,"Sequence number": 10552386, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 781 + } + }, + { + "ph": "f", "id": 80, "pid": 2338706, "tid": 2379422, "ts": 6345937257399.735, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937257401.282, "dur": 6.146, + "args": { + "External id": 978191,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937257404.315, "dur": 2.962, + "args": { + "External id": 978192,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937257415.881, "dur": 11.449, + "args": { + "External id": 978193,"Record function id": 0, "Sequence number": 10552385, "Fwd thread id": 1, "Ev Idx": 784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937257416.752, "dur": 8.092, + "args": { + "External id": 978194,"Sequence number": 10552385, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 785 + } + }, + { + "ph": "f", "id": 81, "pid": 2338706, "tid": 2379422, "ts": 6345937257416.752, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937257417.602, "dur": 7.003, + "args": { + "External id": 978195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937257418.101, "dur": 6.005, + "args": { + "External id": 978196,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937257421.677, "dur": 2.250, + "args": { + "External id": 978197,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937257431.188, "dur": 5.296, + "args": { + "External id": 978198,"Record function id": 0, "Ev Idx": 789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937257432.428, "dur": 3.519, + "args": { + "External id": 978199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937257433.760, "dur": 1.835, + "args": { + "External id": 978200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937257434.568, "dur": 0.932, + "args": { + "External id": 978201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937257439.561, "dur": 6.790, + "args": { + "External id": 978202,"Record function id": 0, "Sequence number": 10552384, "Fwd thread id": 1, "Ev Idx": 793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937257440.514, "dur": 3.589, + "args": { + "External id": 978203,"Sequence number": 10552384, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 794 + } + }, + { + "ph": "f", "id": 82, "pid": 2338706, "tid": 2379422, "ts": 6345937257440.514, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937257442.310, "dur": 1.640, + "args": { + "External id": 978204,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937257442.686, "dur": 1.124, + "args": { + "External id": 978205,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937257450.548, "dur": 360.100, + "args": { + "External id": 978206,"Record function id": 0, "Sequence number": 10552383, "Fwd thread id": 1, "Ev Idx": 797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937257453.848, "dur": 335.614, + "args": { + "External id": 978207,"Sequence number": 10552383, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 798 + } + }, + { + "ph": "f", "id": 83, "pid": 2338706, "tid": 2379422, "ts": 6345937257453.848, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937257473.362, "dur": 5.945, + "args": { + "External id": 978208,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937257475.426, "dur": 3.353, + "args": { + "External id": 978209,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937257481.215, "dur": 5.567, + "args": { + "External id": 978210,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937257484.641, "dur": 1.881, + "args": { + "External id": 978211,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937257488.199, "dur": 3.825, + "args": { + "External id": 978212,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937257489.458, "dur": 2.353, + "args": { + "External id": 978213,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937257518.878, "dur": 245.923, + "args": { + "External id": 978214,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937257601.068, "dur": 5.293, + "args": { + "External id": 978215,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937257610.389, "dur": 5.490, + "args": { + "External id": 978216,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937257616.862, "dur": 2.317, + "args": { + "External id": 978217,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937257620.087, "dur": 2.031, + "args": { + "External id": 978218,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937257665.051, "dur": 2.588, + "args": { + "External id": 978219,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937257666.209, "dur": 1.337, + "args": { + "External id": 978220,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937257671.222, "dur": 28.454, + "args": { + "External id": 978221,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937257679.983, "dur": 0.940, + "args": { + "External id": 978222,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937257700.645, "dur": 1.218, + "args": { + "External id": 978223,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937257701.352, "dur": 0.431, + "args": { + "External id": 978224,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937257702.534, "dur": 17.237, + "args": { + "External id": 978225,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937257706.962, "dur": 0.836, + "args": { + "External id": 978226,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937257777.218, "dur": 3.262, + "args": { + "External id": 978227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937257783.452, "dur": 0.776, + "args": { + "External id": 978228,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937257786.032, "dur": 0.700, + "args": { + "External id": 978229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937257817.799, "dur": 296.286, + "args": { + "External id": 978230,"Record function id": 0, "Sequence number": 10552382, "Fwd thread id": 1, "Ev Idx": 821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937257819.365, "dur": 283.943, + "args": { + "External id": 978231,"Sequence number": 10552382, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 822 + } + }, + { + "ph": "f", "id": 84, "pid": 2338706, "tid": 2379422, "ts": 6345937257819.365, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345937257838.842, "dur": 42.862, + "args": { + "External id": 978232,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937257844.062, "dur": 2.889, + "args": { + "External id": 978233,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937257848.508, "dur": 32.592, + "args": { + "External id": 978234,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937257891.658, "dur": 3.757, + "args": { + "External id": 978235,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937257892.994, "dur": 2.154, + "args": { + "External id": 978236,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937258124.855, "dur": 199.472, + "args": { + "External id": 978237,"Record function id": 0, "Sequence number": 10552381, "Fwd thread id": 1, "Ev Idx": 828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937258127.233, "dur": 189.642, + "args": { + "External id": 978238,"Sequence number": 10552381, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 829 + } + }, + { + "ph": "f", "id": 85, "pid": 2338706, "tid": 2379422, "ts": 6345937258127.233, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345937258141.345, "dur": 62.933, + "args": { + "External id": 978239,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937258146.881, "dur": 6.474, + "args": { + "External id": 978240,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937258157.496, "dur": 45.977, + "args": { + "External id": 978241,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937258212.199, "dur": 4.254, + "args": { + "External id": 978242,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937258213.698, "dur": 2.358, + "args": { + "External id": 978243,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258331.927, "dur": 17.780, + "args": { + "External id": 978244,"Record function id": 0, "Sequence number": 10552380, "Fwd thread id": 1, "Ev Idx": 835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258333.801, "dur": 11.892, + "args": { + "External id": 978245,"Sequence number": 10552380, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 836 + } + }, + { + "ph": "f", "id": 86, "pid": 2338706, "tid": 2379422, "ts": 6345937258333.801, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937258336.701, "dur": 8.725, + "args": { + "External id": 978246,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937258340.561, "dur": 4.680, + "args": { + "External id": 978247,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258353.367, "dur": 6.695, + "args": { + "External id": 978248,"Record function id": 0, "Sequence number": 10552379, "Fwd thread id": 1, "Ev Idx": 839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258354.086, "dur": 3.428, + "args": { + "External id": 978249,"Sequence number": 10552379, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 840 + } + }, + { + "ph": "f", "id": 87, "pid": 2338706, "tid": 2379422, "ts": 6345937258354.086, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937258355.748, "dur": 1.608, + "args": { + "External id": 978250,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937258356.402, "dur": 0.850, + "args": { + "External id": 978251,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258365.845, "dur": 10.348, + "args": { + "External id": 978252,"Record function id": 0, "Sequence number": 10552378, "Fwd thread id": 1, "Ev Idx": 843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258366.626, "dur": 7.526, + "args": { + "External id": 978253,"Sequence number": 10552378, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 844 + } + }, + { + "ph": "f", "id": 88, "pid": 2338706, "tid": 2379422, "ts": 6345937258366.626, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937258367.822, "dur": 6.169, + "args": { + "External id": 978254,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937258372.974, "dur": 0.892, + "args": { + "External id": 978255,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258379.583, "dur": 6.472, + "args": { + "External id": 978256,"Record function id": 0, "Sequence number": 10552377, "Fwd thread id": 1, "Ev Idx": 847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258380.682, "dur": 3.022, + "args": { + "External id": 978257,"Sequence number": 10552377, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 848 + } + }, + { + "ph": "f", "id": 89, "pid": 2338706, "tid": 2379422, "ts": 6345937258380.682, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937258381.924, "dur": 1.624, + "args": { + "External id": 978258,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937258382.436, "dur": 0.980, + "args": { + "External id": 978259,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258389.485, "dur": 156.202, + "args": { + "External id": 978260,"Record function id": 0, "Sequence number": 10552376, "Fwd thread id": 1, "Ev Idx": 851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258390.508, "dur": 146.476, + "args": { + "External id": 978261,"Sequence number": 10552376, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 852 + } + }, + { + "ph": "f", "id": 90, "pid": 2338706, "tid": 2379422, "ts": 6345937258390.508, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937258395.289, "dur": 8.266, + "args": { + "External id": 978262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937258399.371, "dur": 3.506, + "args": { + "External id": 978263,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937258401.172, "dur": 1.446, + "args": { + "External id": 978264,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937258405.339, "dur": 68.642, + "args": { + "External id": 978265,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937258475.296, "dur": 8.593, + "args": { + "External id": 978266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937258476.002, "dur": 7.233, + "args": { + "External id": 978267,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937258479.535, "dur": 3.538, + "args": { + "External id": 978268,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937258487.847, "dur": 2.781, + "args": { + "External id": 978269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937258489.101, "dur": 1.099, + "args": { + "External id": 978270,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937258489.655, "dur": 0.418, + "args": { + "External id": 978271,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937258491.384, "dur": 44.914, + "args": { + "External id": 978272,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258550.558, "dur": 9.979, + "args": { + "External id": 978273,"Record function id": 0, "Sequence number": 10552375, "Fwd thread id": 1, "Ev Idx": 864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258551.662, "dur": 6.720, + "args": { + "External id": 978274,"Sequence number": 10552375, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 865 + } + }, + { + "ph": "f", "id": 91, "pid": 2338706, "tid": 2379422, "ts": 6345937258551.662, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937258553.613, "dur": 4.607, + "args": { + "External id": 978275,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937258556.991, "dur": 1.076, + "args": { + "External id": 978276,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258564.232, "dur": 11.229, + "args": { + "External id": 978277,"Record function id": 0, "Sequence number": 10552374, "Fwd thread id": 1, "Ev Idx": 868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258565.307, "dur": 6.604, + "args": { + "External id": 978278,"Sequence number": 10552374, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 869 + } + }, + { + "ph": "f", "id": 92, "pid": 2338706, "tid": 2379422, "ts": 6345937258565.307, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937258568.799, "dur": 2.896, + "args": { + "External id": 978279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937258569.296, "dur": 1.803, + "args": { + "External id": 978280,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937258570.372, "dur": 0.638, + "args": { + "External id": 978281,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937258580.946, "dur": 9.703, + "args": { + "External id": 978282,"Record function id": 0, "Ev Idx": 873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937258582.475, "dur": 7.262, + "args": { + "External id": 978283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937258585.288, "dur": 4.119, + "args": { + "External id": 978284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937258586.654, "dur": 2.653, + "args": { + "External id": 978285,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258593.916, "dur": 8.612, + "args": { + "External id": 978286,"Record function id": 0, "Sequence number": 10552373, "Fwd thread id": 1, "Ev Idx": 877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258594.798, "dur": 5.247, + "args": { + "External id": 978287,"Sequence number": 10552373, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 878 + } + }, + { + "ph": "f", "id": 93, "pid": 2338706, "tid": 2379422, "ts": 6345937258594.798, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937258596.079, "dur": 3.822, + "args": { + "External id": 978288,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937258599.084, "dur": 0.725, + "args": { + "External id": 978289,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258605.760, "dur": 101.193, + "args": { + "External id": 978290,"Record function id": 0, "Sequence number": 10552372, "Fwd thread id": 1, "Ev Idx": 881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258608.765, "dur": 90.508, + "args": { + "External id": 978291,"Sequence number": 10552372, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 882 + } + }, + { + "ph": "f", "id": 94, "pid": 2338706, "tid": 2379422, "ts": 6345937258608.765, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937258611.294, "dur": 2.255, + "args": { + "External id": 978292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937258611.955, "dur": 1.193, + "args": { + "External id": 978293,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937258612.614, "dur": 0.414, + "args": { + "External id": 978294,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937258614.321, "dur": 28.020, + "args": { + "External id": 978295,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937258643.478, "dur": 10.685, + "args": { + "External id": 978296,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937258648.831, "dur": 4.819, + "args": { + "External id": 978297,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937258651.849, "dur": 1.602, + "args": { + "External id": 978298,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937258655.698, "dur": 5.796, + "args": { + "External id": 978299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937258656.377, "dur": 4.689, + "args": { + "External id": 978300,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937258660.551, "dur": 0.437, + "args": { + "External id": 978301,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937258662.136, "dur": 36.325, + "args": { + "External id": 978302,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258711.573, "dur": 49.639, + "args": { + "External id": 978303,"Record function id": 0, "Sequence number": 10552371, "Fwd thread id": 1, "Ev Idx": 894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258712.511, "dur": 3.880, + "args": { + "External id": 978304,"Sequence number": 10552371, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 895 + } + }, + { + "ph": "f", "id": 95, "pid": 2338706, "tid": 2379422, "ts": 6345937258712.511, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937258714.438, "dur": 1.815, + "args": { + "External id": 978305,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937258715.064, "dur": 1.065, + "args": { + "External id": 978306,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2379422, + "ts": 6345937258736.451, "dur": 22.413, + "args": { + "External id": 978307,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258767.533, "dur": 27.062, + "args": { + "External id": 978308,"Record function id": 0, "Sequence number": 10552370, "Fwd thread id": 1, "Ev Idx": 899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258770.835, "dur": 20.448, + "args": { + "External id": 978309,"Sequence number": 10552370, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 900 + } + }, + { + "ph": "f", "id": 96, "pid": 2338706, "tid": 2379422, "ts": 6345937258770.835, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937258772.271, "dur": 18.801, + "args": { + "External id": 978310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937258773.048, "dur": 17.253, + "args": { + "External id": 978311,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937258789.338, "dur": 0.717, + "args": { + "External id": 978312,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937258798.964, "dur": 6.997, + "args": { + "External id": 978313,"Record function id": 0, "Ev Idx": 904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937258800.169, "dur": 5.206, + "args": { + "External id": 978314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937258801.264, "dur": 3.717, + "args": { + "External id": 978315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937258801.724, "dur": 3.161, + "args": { + "External id": 978316,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258811.280, "dur": 9.300, + "args": { + "External id": 978317,"Record function id": 0, "Sequence number": 10552369, "Fwd thread id": 1, "Ev Idx": 908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258812.168, "dur": 5.610, + "args": { + "External id": 978318,"Sequence number": 10552369, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 909 + } + }, + { + "ph": "f", "id": 97, "pid": 2338706, "tid": 2379422, "ts": 6345937258812.168, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937258813.632, "dur": 4.004, + "args": { + "External id": 978319,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937258816.438, "dur": 1.090, + "args": { + "External id": 978320,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258823.853, "dur": 103.483, + "args": { + "External id": 978321,"Record function id": 0, "Sequence number": 10552368, "Fwd thread id": 1, "Ev Idx": 912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258824.803, "dur": 91.362, + "args": { + "External id": 978322,"Sequence number": 10552368, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 913 + } + }, + { + "ph": "f", "id": 98, "pid": 2338706, "tid": 2379422, "ts": 6345937258824.803, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937258827.655, "dur": 2.116, + "args": { + "External id": 978323,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937258828.041, "dur": 1.312, + "args": { + "External id": 978324,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937258828.785, "dur": 0.455, + "args": { + "External id": 978325,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937258830.564, "dur": 37.524, + "args": { + "External id": 978326,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937258871.670, "dur": 5.276, + "args": { + "External id": 978327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937258872.302, "dur": 4.138, + "args": { + "External id": 978328,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937258875.336, "dur": 0.984, + "args": { + "External id": 978329,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937258878.234, "dur": 2.442, + "args": { + "External id": 978330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937258879.120, "dur": 0.928, + "args": { + "External id": 978331,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937258879.640, "dur": 0.310, + "args": { + "External id": 978332,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937258883.792, "dur": 31.763, + "args": { + "External id": 978333,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258931.778, "dur": 31.590, + "args": { + "External id": 978334,"Record function id": 0, "Sequence number": 10552367, "Fwd thread id": 1, "Ev Idx": 925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258932.608, "dur": 8.437, + "args": { + "External id": 978335,"Sequence number": 10552367, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 926 + } + }, + { + "ph": "f", "id": 99, "pid": 2338706, "tid": 2379422, "ts": 6345937258932.608, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937258934.123, "dur": 6.757, + "args": { + "External id": 978336,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937258936.585, "dur": 4.123, + "args": { + "External id": 978337,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345937258943.945, "dur": 16.793, + "args": { + "External id": 978338,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258967.187, "dur": 9.843, + "args": { + "External id": 978339,"Record function id": 0, "Sequence number": 10552366, "Fwd thread id": 1, "Ev Idx": 930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937258968.194, "dur": 6.525, + "args": { + "External id": 978340,"Sequence number": 10552366, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 931 + } + }, + { + "ph": "f", "id": 100, "pid": 2338706, "tid": 2379422, "ts": 6345937258968.194, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937258969.195, "dur": 5.336, + "args": { + "External id": 978341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937258970.272, "dur": 3.762, + "args": { + "External id": 978342,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937258971.442, "dur": 2.440, + "args": { + "External id": 978343,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937258980.727, "dur": 4.526, + "args": { + "External id": 978344,"Record function id": 0, "Ev Idx": 935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937258981.835, "dur": 2.944, + "args": { + "External id": 978345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937258982.946, "dur": 1.447, + "args": { + "External id": 978346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937258983.405, "dur": 0.841, + "args": { + "External id": 978347,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937258989.698, "dur": 463.180, + "args": { + "External id": 978348,"Record function id": 0, "Sequence number": 10552365, "Fwd thread id": 1, "Ev Idx": 939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937258991.016, "dur": 426.214, + "args": { + "External id": 978349,"Sequence number": 10552365, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 940 + } + }, + { + "ph": "f", "id": 101, "pid": 2338706, "tid": 2379422, "ts": 6345937258991.016, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937259050.949, "dur": 50.071, + "args": { + "External id": 978350,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937259098.607, "dur": 2.107, + "args": { + "External id": 978351,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937259118.218, "dur": 5.847, + "args": { + "External id": 978352,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937259134.396, "dur": 2.501, + "args": { + "External id": 978353,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937259298.925, "dur": 2.218, + "args": { + "External id": 978354,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937259305.268, "dur": 39.268, + "args": { + "External id": 978355,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937259317.613, "dur": 1.112, + "args": { + "External id": 978356,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937259350.860, "dur": 37.330, + "args": { + "External id": 978357,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937259352.602, "dur": 35.342, + "args": { + "External id": 978358,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937259359.769, "dur": 6.201, + "args": { + "External id": 978359,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937259367.536, "dur": 19.825, + "args": { + "External id": 978360,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345937259393.283, "dur": 4.977, + "args": { + "External id": 978361,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937259394.370, "dur": 3.772, + "args": { + "External id": 978362,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937259407.287, "dur": 1.696, + "args": { + "External id": 978363,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937259407.838, "dur": 1.021, + "args": { + "External id": 978364,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345937259431.251, "dur": 17.230, + "args": { + "External id": 978365,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937259465.983, "dur": 9.322, + "args": { + "External id": 978366,"Record function id": 0, "Ev Idx": 957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937259468.295, "dur": 6.209, + "args": { + "External id": 978367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937259470.945, "dur": 2.615, + "args": { + "External id": 978368,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937259471.831, "dur": 1.638, + "args": { + "External id": 978369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937259479.049, "dur": 8.111, + "args": { + "External id": 978370,"Record function id": 0, "Sequence number": 10552364, "Fwd thread id": 1, "Ev Idx": 961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937259480.749, "dur": 1.931, + "args": { + "External id": 978371,"Sequence number": 10552364, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 962 + } + }, + { + "ph": "f", "id": 102, "pid": 2338706, "tid": 2379422, "ts": 6345937259480.749, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937259491.026, "dur": 420.944, + "args": { + "External id": 978372,"Record function id": 0, "Sequence number": 10552363, "Fwd thread id": 1, "Ev Idx": 963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937259492.372, "dur": 405.346, + "args": { + "External id": 978373,"Sequence number": 10552363, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 964 + } + }, + { + "ph": "f", "id": 103, "pid": 2338706, "tid": 2379422, "ts": 6345937259492.372, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937259526.224, "dur": 9.960, + "args": { + "External id": 978374,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937259532.700, "dur": 3.242, + "args": { + "External id": 978375,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937259539.757, "dur": 4.377, + "args": { + "External id": 978376,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937259541.125, "dur": 2.500, + "args": { + "External id": 978377,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937259542.708, "dur": 0.749, + "args": { + "External id": 978378,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2379422, + "ts": 6345937259547.791, "dur": 91.940, + "args": { + "External id": 978379,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937259551.145, "dur": 2.946, + "args": { + "External id": 978380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937259551.669, "dur": 2.034, + "args": { + "External id": 978381,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937259552.659, "dur": 0.941, + "args": { + "External id": 978382,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2379422, + "ts": 6345937259555.497, "dur": 83.717, + "args": { + "External id": 978383,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937259557.020, "dur": 81.599, + "args": { + "External id": 978384,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345937259643.773, "dur": 4.664, + "args": { + "External id": 978385,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937259646.838, "dur": 1.452, + "args": { + "External id": 978386,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937259682.390, "dur": 5.354, + "args": { + "External id": 978387,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937259688.613, "dur": 4.197, + "args": { + "External id": 978388,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937259693.571, "dur": 1.749, + "args": { + "External id": 978389,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937259728.664, "dur": 4.914, + "args": { + "External id": 978390,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937259729.658, "dur": 3.703, + "args": { + "External id": 978391,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338706, "tid": 2379422, + "ts": 6345937259756.353, "dur": 120.300, + "args": { + "External id": 978392,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2379422, + "ts": 6345937259761.579, "dur": 4.625, + "args": { + "External id": 978393,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937259764.505, "dur": 0.827, + "args": { + "External id": 978394,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937259769.777, "dur": 3.950, + "args": { + "External id": 978395,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937259772.298, "dur": 0.581, + "args": { + "External id": 978396,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2379422, + "ts": 6345937259774.998, "dur": 3.507, + "args": { + "External id": 978397,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937259777.711, "dur": 0.449, + "args": { + "External id": 978398,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937259779.158, "dur": 4.346, + "args": { + "External id": 978399,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937259782.494, "dur": 0.446, + "args": { + "External id": 978400,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937259786.876, "dur": 5.553, + "args": { + "External id": 978401,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937259791.737, "dur": 0.398, + "args": { + "External id": 978402,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937259793.166, "dur": 9.639, + "args": { + "External id": 978403,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937259798.613, "dur": 4.048, + "args": { + "External id": 978404,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937259805.353, "dur": 1.981, + "args": { + "External id": 978405,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937259806.533, "dur": 0.504, + "args": { + "External id": 978406,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937259810.109, "dur": 4.478, + "args": { + "External id": 978407,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937259810.656, "dur": 3.831, + "args": { + "External id": 978408,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345937259815.796, "dur": 49.091, + "args": { + "External id": 978409,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 1000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937259866.473, "dur": 0.990, + "args": { + "External id": 978410,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 1001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937259867.993, "dur": 4.332, + "args": { + "External id": 978411,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 1002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937259871.274, "dur": 0.482, + "args": { + "External id": 978412,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 1003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937259874.678, "dur": 0.860, + "args": { + "External id": 978413,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 1004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937259920.410, "dur": 8.242, + "args": { + "External id": 978414,"Record function id": 0, "Ev Idx": 1005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937259922.587, "dur": 5.298, + "args": { + "External id": 978415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937259924.225, "dur": 2.693, + "args": { + "External id": 978416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937259924.956, "dur": 1.853, + "args": { + "External id": 978417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937259932.384, "dur": 10.794, + "args": { + "External id": 978418,"Record function id": 0, "Sequence number": 10552362, "Fwd thread id": 1, "Ev Idx": 1009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937259933.431, "dur": 5.654, + "args": { + "External id": 978419,"Sequence number": 10552362, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1010 + } + }, + { + "ph": "f", "id": 104, "pid": 2338706, "tid": 2379422, "ts": 6345937259933.431, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937259934.916, "dur": 3.935, + "args": { + "External id": 978420,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937259938.033, "dur": 0.681, + "args": { + "External id": 978421,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937259947.152, "dur": 271.981, + "args": { + "External id": 978422,"Record function id": 0, "Sequence number": 10552361, "Fwd thread id": 1, "Ev Idx": 1013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937259948.038, "dur": 261.141, + "args": { + "External id": 978423,"Sequence number": 10552361, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1014 + } + }, + { + "ph": "f", "id": 105, "pid": 2338706, "tid": 2379422, "ts": 6345937259948.038, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937259952.331, "dur": 7.857, + "args": { + "External id": 978424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937259953.575, "dur": 6.086, + "args": { + "External id": 978425,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937259956.976, "dur": 2.562, + "args": { + "External id": 978426,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937259961.310, "dur": 41.457, + "args": { + "External id": 978427,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937260004.096, "dur": 24.333, + "args": { + "External id": 978428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937260004.756, "dur": 1.971, + "args": { + "External id": 978429,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937260005.736, "dur": 0.833, + "args": { + "External id": 978430,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937260031.174, "dur": 128.759, + "args": { + "External id": 978431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937260035.380, "dur": 123.372, + "args": { + "External id": 978432,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937260157.138, "dur": 0.989, + "args": { + "External id": 978433,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937260161.129, "dur": 46.460, + "args": { + "External id": 978434,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937260229.329, "dur": 7.374, + "args": { + "External id": 978435,"Record function id": 0, "Sequence number": 10552360, "Fwd thread id": 1, "Ev Idx": 1026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937260230.688, "dur": 4.478, + "args": { + "External id": 978436,"Sequence number": 10552360, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1027 + } + }, + { + "ph": "f", "id": 106, "pid": 2338706, "tid": 2379422, "ts": 6345937260230.688, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937260232.607, "dur": 2.392, + "args": { + "External id": 978437,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937260233.275, "dur": 1.559, + "args": { + "External id": 978438,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937260240.193, "dur": 10.517, + "args": { + "External id": 978439,"Record function id": 0, "Sequence number": 10552359, "Fwd thread id": 1, "Ev Idx": 1030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937260241.131, "dur": 6.818, + "args": { + "External id": 978440,"Sequence number": 10552359, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1031 + } + }, + { + "ph": "f", "id": 107, "pid": 2338706, "tid": 2379422, "ts": 6345937260241.131, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937260242.185, "dur": 5.558, + "args": { + "External id": 978441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937260245.125, "dur": 2.181, + "args": { + "External id": 978442,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937260246.644, "dur": 0.439, + "args": { + "External id": 978443,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937260257.559, "dur": 8.875, + "args": { + "External id": 978444,"Record function id": 0, "Ev Idx": 1035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937260258.887, "dur": 6.836, + "args": { + "External id": 978445,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937260260.503, "dur": 4.898, + "args": { + "External id": 978446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937260261.164, "dur": 4.122, + "args": { + "External id": 978447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937260269.619, "dur": 6.370, + "args": { + "External id": 978448,"Record function id": 0, "Sequence number": 10552358, "Fwd thread id": 1, "Ev Idx": 1039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937260270.730, "dur": 2.842, + "args": { + "External id": 978449,"Sequence number": 10552358, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1040 + } + }, + { + "ph": "f", "id": 108, "pid": 2338706, "tid": 2379422, "ts": 6345937260270.730, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937260271.843, "dur": 1.576, + "args": { + "External id": 978450,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937260272.481, "dur": 0.851, + "args": { + "External id": 978451,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937260278.961, "dur": 102.566, + "args": { + "External id": 978452,"Record function id": 0, "Sequence number": 10552357, "Fwd thread id": 1, "Ev Idx": 1043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937260282.422, "dur": 89.183, + "args": { + "External id": 978453,"Sequence number": 10552357, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1044 + } + }, + { + "ph": "f", "id": 109, "pid": 2338706, "tid": 2379422, "ts": 6345937260282.422, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937260286.306, "dur": 2.198, + "args": { + "External id": 978454,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937260286.781, "dur": 1.308, + "args": { + "External id": 978455,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937260287.416, "dur": 0.523, + "args": { + "External id": 978456,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937260289.465, "dur": 36.141, + "args": { + "External id": 978457,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937260326.622, "dur": 7.637, + "args": { + "External id": 978458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937260327.080, "dur": 6.637, + "args": { + "External id": 978459,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937260332.817, "dur": 0.784, + "args": { + "External id": 978460,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937260335.592, "dur": 3.466, + "args": { + "External id": 978461,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937260337.137, "dur": 1.483, + "args": { + "External id": 978462,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937260337.999, "dur": 0.559, + "args": { + "External id": 978463,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937260339.667, "dur": 31.165, + "args": { + "External id": 978464,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937260385.782, "dur": 36.392, + "args": { + "External id": 978465,"Record function id": 0, "Sequence number": 10552356, "Fwd thread id": 1, "Ev Idx": 1056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937260386.714, "dur": 5.269, + "args": { + "External id": 978466,"Sequence number": 10552356, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1057 + } + }, + { + "ph": "f", "id": 110, "pid": 2338706, "tid": 2379422, "ts": 6345937260386.714, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937260388.020, "dur": 3.823, + "args": { + "External id": 978467,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937260390.767, "dur": 0.893, + "args": { + "External id": 978468,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2379422, + "ts": 6345937260394.872, "dur": 23.766, + "args": { + "External id": 978469,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937260425.867, "dur": 9.776, + "args": { + "External id": 978470,"Record function id": 0, "Sequence number": 10552355, "Fwd thread id": 1, "Ev Idx": 1061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937260429.245, "dur": 3.805, + "args": { + "External id": 978471,"Sequence number": 10552355, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1062 + } + }, + { + "ph": "f", "id": 111, "pid": 2338706, "tid": 2379422, "ts": 6345937260429.245, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937260430.013, "dur": 2.843, + "args": { + "External id": 978472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937260430.962, "dur": 1.414, + "args": { + "External id": 978473,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937260431.854, "dur": 0.374, + "args": { + "External id": 978474,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937260439.412, "dur": 4.641, + "args": { + "External id": 978475,"Record function id": 0, "Ev Idx": 1066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937260440.504, "dur": 2.993, + "args": { + "External id": 978476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937260441.265, "dur": 1.731, + "args": { + "External id": 978477,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937260441.961, "dur": 0.905, + "args": { + "External id": 978478,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937260448.545, "dur": 438.056, + "args": { + "External id": 978479,"Record function id": 0, "Sequence number": 10552354, "Fwd thread id": 1, "Ev Idx": 1070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937260449.766, "dur": 401.322, + "args": { + "External id": 978480,"Sequence number": 10552354, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 1071 + } + }, + { + "ph": "f", "id": 112, "pid": 2338706, "tid": 2379422, "ts": 6345937260449.766, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2379422, + "ts": 6345937260477.186, "dur": 34.648, + "args": { + "External id": 978481,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345937260478.613, "dur": 33.022, + "args": { + "External id": 978482,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937260484.094, "dur": 5.806, + "args": { + "External id": 978483,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 1074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937260486.643, "dur": 2.637, + "args": { + "External id": 978484,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937260491.310, "dur": 19.864, + "args": { + "External id": 978485,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937260527.412, "dur": 1.697, + "args": { + "External id": 978486,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937260527.907, "dur": 1.054, + "args": { + "External id": 978487,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937260532.760, "dur": 1.359, + "args": { + "External id": 978488,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937260533.339, "dur": 0.677, + "args": { + "External id": 978489,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937260549.733, "dur": 2.087, + "args": { + "External id": 978490,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937260565.467, "dur": 2.226, + "args": { + "External id": 978491,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937260732.741, "dur": 3.843, + "args": { + "External id": 978492,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937260740.679, "dur": 37.429, + "args": { + "External id": 978493,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937260755.285, "dur": 0.754, + "args": { + "External id": 978494,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937260784.309, "dur": 33.080, + "args": { + "External id": 978495,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937260786.167, "dur": 30.900, + "args": { + "External id": 978496,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937260790.450, "dur": 6.022, + "args": { + "External id": 978497,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937260800.353, "dur": 16.166, + "args": { + "External id": 978498,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345937260821.520, "dur": 2.344, + "args": { + "External id": 978499,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937260822.448, "dur": 1.288, + "args": { + "External id": 978500,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937260830.866, "dur": 4.830, + "args": { + "External id": 978501,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937260834.496, "dur": 1.117, + "args": { + "External id": 978502,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937260837.845, "dur": 1.780, + "args": { + "External id": 978503,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937260838.426, "dur": 1.105, + "args": { + "External id": 978504,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345937260867.759, "dur": 17.257, + "args": { + "External id": 978505,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937260896.469, "dur": 7.560, + "args": { + "External id": 978506,"Record function id": 0, "Ev Idx": 1097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937260898.471, "dur": 4.948, + "args": { + "External id": 978507,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937260900.165, "dur": 2.257, + "args": { + "External id": 978508,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937260901.039, "dur": 1.287, + "args": { + "External id": 978509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937260907.664, "dur": 10.223, + "args": { + "External id": 978510,"Record function id": 0, "Sequence number": 10552353, "Fwd thread id": 1, "Ev Idx": 1101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937260908.794, "dur": 6.399, + "args": { + "External id": 978511,"Sequence number": 10552353, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1102 + } + }, + { + "ph": "f", "id": 113, "pid": 2338706, "tid": 2379422, "ts": 6345937260908.794, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937260910.322, "dur": 4.663, + "args": { + "External id": 978512,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937260913.376, "dur": 1.475, + "args": { + "External id": 978513,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937260921.614, "dur": 237.617, + "args": { + "External id": 978514,"Record function id": 0, "Sequence number": 10552352, "Fwd thread id": 1, "Ev Idx": 1105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937260922.843, "dur": 227.050, + "args": { + "External id": 978515,"Sequence number": 10552352, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1106 + } + }, + { + "ph": "f", "id": 114, "pid": 2338706, "tid": 2379422, "ts": 6345937260922.843, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937260929.890, "dur": 3.995, + "args": { + "External id": 978516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937260931.036, "dur": 2.337, + "args": { + "External id": 978517,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937260932.415, "dur": 0.833, + "args": { + "External id": 978518,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937260935.169, "dur": 56.835, + "args": { + "External id": 978519,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937260993.181, "dur": 5.677, + "args": { + "External id": 978520,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937260993.909, "dur": 4.390, + "args": { + "External id": 978521,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937260997.223, "dur": 0.949, + "args": { + "External id": 978522,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937261000.489, "dur": 98.690, + "args": { + "External id": 978523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937261089.635, "dur": 8.327, + "args": { + "External id": 978524,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937261094.636, "dur": 3.048, + "args": { + "External id": 978525,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937261100.095, "dur": 48.616, + "args": { + "External id": 978526,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937261169.239, "dur": 9.016, + "args": { + "External id": 978527,"Record function id": 0, "Sequence number": 10552351, "Fwd thread id": 1, "Ev Idx": 1118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937261170.585, "dur": 4.636, + "args": { + "External id": 978528,"Sequence number": 10552351, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1119 + } + }, + { + "ph": "f", "id": 115, "pid": 2338706, "tid": 2379422, "ts": 6345937261170.585, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937261172.504, "dur": 2.560, + "args": { + "External id": 978529,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937261173.448, "dur": 1.462, + "args": { + "External id": 978530,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937261181.647, "dur": 13.205, + "args": { + "External id": 978531,"Record function id": 0, "Sequence number": 10552350, "Fwd thread id": 1, "Ev Idx": 1122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937261182.525, "dur": 10.389, + "args": { + "External id": 978532,"Sequence number": 10552350, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1123 + } + }, + { + "ph": "f", "id": 116, "pid": 2338706, "tid": 2379422, "ts": 6345937261182.525, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937261183.949, "dur": 8.727, + "args": { + "External id": 978533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937261184.667, "dur": 7.455, + "args": { + "External id": 978534,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937261189.640, "dur": 2.269, + "args": { + "External id": 978535,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937261199.149, "dur": 6.779, + "args": { + "External id": 978536,"Record function id": 0, "Ev Idx": 1127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937261200.639, "dur": 4.625, + "args": { + "External id": 978537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937261202.415, "dur": 2.414, + "args": { + "External id": 978538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937261203.138, "dur": 1.585, + "args": { + "External id": 978539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937261209.039, "dur": 6.836, + "args": { + "External id": 978540,"Record function id": 0, "Sequence number": 10552349, "Fwd thread id": 1, "Ev Idx": 1131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937261210.167, "dur": 3.527, + "args": { + "External id": 978541,"Sequence number": 10552349, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1132 + } + }, + { + "ph": "f", "id": 117, "pid": 2338706, "tid": 2379422, "ts": 6345937261210.167, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937261211.549, "dur": 1.996, + "args": { + "External id": 978542,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937261212.254, "dur": 1.167, + "args": { + "External id": 978543,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937261220.135, "dur": 433.856, + "args": { + "External id": 978544,"Record function id": 0, "Sequence number": 10552348, "Fwd thread id": 1, "Ev Idx": 1135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937261221.313, "dur": 406.025, + "args": { + "External id": 978545,"Sequence number": 10552348, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1136 + } + }, + { + "ph": "f", "id": 118, "pid": 2338706, "tid": 2379422, "ts": 6345937261221.313, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937261242.112, "dur": 11.177, + "args": { + "External id": 978546,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937261248.557, "dur": 4.225, + "args": { + "External id": 978547,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937261255.857, "dur": 2.686, + "args": { + "External id": 978548,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937261256.549, "dur": 1.790, + "args": { + "External id": 978549,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937261268.802, "dur": 9.220, + "args": { + "External id": 978550,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937261271.707, "dur": 6.107, + "args": { + "External id": 978551,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937261305.194, "dur": 295.792, + "args": { + "External id": 978552,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937261388.099, "dur": 4.713, + "args": { + "External id": 978553,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937261394.319, "dur": 2.435, + "args": { + "External id": 978554,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937261399.744, "dur": 2.020, + "args": { + "External id": 978555,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937261402.689, "dur": 3.736, + "args": { + "External id": 978556,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937261495.166, "dur": 1.871, + "args": { + "External id": 978557,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937261495.696, "dur": 1.216, + "args": { + "External id": 978558,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937261498.641, "dur": 26.222, + "args": { + "External id": 978559,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937261502.830, "dur": 0.846, + "args": { + "External id": 978560,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937261531.494, "dur": 1.143, + "args": { + "External id": 978561,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937261532.024, "dur": 0.527, + "args": { + "External id": 978562,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937261535.696, "dur": 19.647, + "args": { + "External id": 978563,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937261539.201, "dur": 0.482, + "args": { + "External id": 978564,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937261614.442, "dur": 3.475, + "args": { + "External id": 978565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937261620.972, "dur": 0.599, + "args": { + "External id": 978566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937261623.477, "dur": 0.625, + "args": { + "External id": 978567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937261663.268, "dur": 240.456, + "args": { + "External id": 978568,"Record function id": 0, "Sequence number": 10552347, "Fwd thread id": 1, "Ev Idx": 1159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937261664.785, "dur": 230.949, + "args": { + "External id": 978569,"Sequence number": 10552347, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1160 + } + }, + { + "ph": "f", "id": 119, "pid": 2338706, "tid": 2379422, "ts": 6345937261664.785, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345937261686.693, "dur": 50.570, + "args": { + "External id": 978570,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937261689.759, "dur": 5.549, + "args": { + "External id": 978571,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937261696.848, "dur": 39.643, + "args": { + "External id": 978572,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 1163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937261748.130, "dur": 6.778, + "args": { + "External id": 978573,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937261751.899, "dur": 2.713, + "args": { + "External id": 978574,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937261911.210, "dur": 250.284, + "args": { + "External id": 978575,"Record function id": 0, "Sequence number": 10552346, "Fwd thread id": 1, "Ev Idx": 1166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937261913.029, "dur": 238.954, + "args": { + "External id": 978576,"Sequence number": 10552346, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1167 + } + }, + { + "ph": "f", "id": 120, "pid": 2338706, "tid": 2379422, "ts": 6345937261913.029, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345937261924.927, "dur": 40.907, + "args": { + "External id": 978577,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937261927.153, "dur": 2.819, + "args": { + "External id": 978578,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937261930.869, "dur": 34.322, + "args": { + "External id": 978579,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 1170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937261972.697, "dur": 8.729, + "args": { + "External id": 978580,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937261978.226, "dur": 2.875, + "args": { + "External id": 978581,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262171.957, "dur": 16.873, + "args": { + "External id": 978582,"Record function id": 0, "Sequence number": 10552345, "Fwd thread id": 1, "Ev Idx": 1173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262173.868, "dur": 11.616, + "args": { + "External id": 978583,"Sequence number": 10552345, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1174 + } + }, + { + "ph": "f", "id": 121, "pid": 2338706, "tid": 2379422, "ts": 6345937262173.868, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937262177.253, "dur": 7.980, + "args": { + "External id": 978584,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937262178.815, "dur": 6.213, + "args": { + "External id": 978585,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262192.378, "dur": 9.806, + "args": { + "External id": 978586,"Record function id": 0, "Sequence number": 10552344, "Fwd thread id": 1, "Ev Idx": 1177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262193.338, "dur": 5.574, + "args": { + "External id": 978587,"Sequence number": 10552344, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1178 + } + }, + { + "ph": "f", "id": 122, "pid": 2338706, "tid": 2379422, "ts": 6345937262193.338, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937262194.987, "dur": 3.772, + "args": { + "External id": 978588,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937262197.707, "dur": 0.947, + "args": { + "External id": 978589,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262206.097, "dur": 7.768, + "args": { + "External id": 978590,"Record function id": 0, "Sequence number": 10552343, "Fwd thread id": 1, "Ev Idx": 1181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262207.212, "dur": 3.283, + "args": { + "External id": 978591,"Sequence number": 10552343, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1182 + } + }, + { + "ph": "f", "id": 123, "pid": 2338706, "tid": 2379422, "ts": 6345937262207.212, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937262208.682, "dur": 1.673, + "args": { + "External id": 978592,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937262209.317, "dur": 0.938, + "args": { + "External id": 978593,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262217.291, "dur": 11.829, + "args": { + "External id": 978594,"Record function id": 0, "Sequence number": 10552342, "Fwd thread id": 1, "Ev Idx": 1185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262218.633, "dur": 7.446, + "args": { + "External id": 978595,"Sequence number": 10552342, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1186 + } + }, + { + "ph": "f", "id": 124, "pid": 2338706, "tid": 2379422, "ts": 6345937262218.633, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937262221.974, "dur": 3.960, + "args": { + "External id": 978596,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937262224.740, "dur": 1.104, + "args": { + "External id": 978597,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262235.471, "dur": 192.430, + "args": { + "External id": 978598,"Record function id": 0, "Sequence number": 10552341, "Fwd thread id": 1, "Ev Idx": 1189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262236.691, "dur": 181.670, + "args": { + "External id": 978599,"Sequence number": 10552341, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1190 + } + }, + { + "ph": "f", "id": 125, "pid": 2338706, "tid": 2379422, "ts": 6345937262236.691, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937262243.191, "dur": 9.819, + "args": { + "External id": 978600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937262245.705, "dur": 6.561, + "args": { + "External id": 978601,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937262247.944, "dur": 4.024, + "args": { + "External id": 978602,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937262257.079, "dur": 74.109, + "args": { + "External id": 978603,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937262332.834, "dur": 10.611, + "args": { + "External id": 978604,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937262338.263, "dur": 4.296, + "args": { + "External id": 978605,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937262339.896, "dur": 2.443, + "args": { + "External id": 978606,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937262345.662, "dur": 8.615, + "args": { + "External id": 978607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937262347.359, "dur": 6.282, + "args": { + "External id": 978608,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937262352.845, "dur": 0.674, + "args": { + "External id": 978609,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937262354.947, "dur": 62.570, + "args": { + "External id": 978610,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262433.259, "dur": 7.518, + "args": { + "External id": 978611,"Record function id": 0, "Sequence number": 10552340, "Fwd thread id": 1, "Ev Idx": 1202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262434.450, "dur": 4.371, + "args": { + "External id": 978612,"Sequence number": 10552340, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1203 + } + }, + { + "ph": "f", "id": 126, "pid": 2338706, "tid": 2379422, "ts": 6345937262434.450, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937262436.103, "dur": 2.577, + "args": { + "External id": 978613,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937262437.021, "dur": 1.400, + "args": { + "External id": 978614,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262443.939, "dur": 13.362, + "args": { + "External id": 978615,"Record function id": 0, "Sequence number": 10552339, "Fwd thread id": 1, "Ev Idx": 1206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262444.952, "dur": 9.519, + "args": { + "External id": 978616,"Sequence number": 10552339, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1207 + } + }, + { + "ph": "f", "id": 127, "pid": 2338706, "tid": 2379422, "ts": 6345937262444.952, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937262446.033, "dur": 8.231, + "args": { + "External id": 978617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937262446.752, "dur": 6.949, + "args": { + "External id": 978618,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937262452.844, "dur": 0.721, + "args": { + "External id": 978619,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937262463.772, "dur": 11.601, + "args": { + "External id": 978620,"Record function id": 0, "Ev Idx": 1211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937262465.182, "dur": 9.393, + "args": { + "External id": 978621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937262467.541, "dur": 6.666, + "args": { + "External id": 978622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937262468.915, "dur": 5.190, + "args": { + "External id": 978623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262478.913, "dur": 7.246, + "args": { + "External id": 978624,"Record function id": 0, "Sequence number": 10552338, "Fwd thread id": 1, "Ev Idx": 1215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262479.886, "dur": 3.553, + "args": { + "External id": 978625,"Sequence number": 10552338, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1216 + } + }, + { + "ph": "f", "id": 128, "pid": 2338706, "tid": 2379422, "ts": 6345937262479.886, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937262481.586, "dur": 1.686, + "args": { + "External id": 978626,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937262482.308, "dur": 0.821, + "args": { + "External id": 978627,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262489.295, "dur": 111.354, + "args": { + "External id": 978628,"Record function id": 0, "Sequence number": 10552337, "Fwd thread id": 1, "Ev Idx": 1219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262490.050, "dur": 102.940, + "args": { + "External id": 978629,"Sequence number": 10552337, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1220 + } + }, + { + "ph": "f", "id": 129, "pid": 2338706, "tid": 2379422, "ts": 6345937262490.050, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937262495.987, "dur": 3.047, + "args": { + "External id": 978630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937262496.853, "dur": 1.722, + "args": { + "External id": 978631,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937262497.820, "dur": 0.649, + "args": { + "External id": 978632,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937262500.090, "dur": 27.128, + "args": { + "External id": 978633,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937262528.121, "dur": 7.940, + "args": { + "External id": 978634,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937262528.757, "dur": 6.748, + "args": { + "External id": 978635,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937262534.421, "dur": 0.963, + "args": { + "External id": 978636,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937262537.344, "dur": 3.078, + "args": { + "External id": 978637,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937262538.795, "dur": 1.240, + "args": { + "External id": 978638,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937262539.395, "dur": 0.570, + "args": { + "External id": 978639,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937262540.860, "dur": 51.240, + "args": { + "External id": 978640,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262605.180, "dur": 37.429, + "args": { + "External id": 978641,"Record function id": 0, "Sequence number": 10552336, "Fwd thread id": 1, "Ev Idx": 1232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262606.364, "dur": 6.039, + "args": { + "External id": 978642,"Sequence number": 10552336, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1233 + } + }, + { + "ph": "f", "id": 130, "pid": 2338706, "tid": 2379422, "ts": 6345937262606.364, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937262607.813, "dur": 4.436, + "args": { + "External id": 978643,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937262610.734, "dur": 1.360, + "args": { + "External id": 978644,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2379422, + "ts": 6345937262615.658, "dur": 24.636, + "args": { + "External id": 978645,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262646.315, "dur": 9.707, + "args": { + "External id": 978646,"Record function id": 0, "Sequence number": 10552335, "Fwd thread id": 1, "Ev Idx": 1237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262649.777, "dur": 4.066, + "args": { + "External id": 978647,"Sequence number": 10552335, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1238 + } + }, + { + "ph": "f", "id": 131, "pid": 2338706, "tid": 2379422, "ts": 6345937262649.777, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937262650.746, "dur": 2.887, + "args": { + "External id": 978648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937262651.395, "dur": 1.759, + "args": { + "External id": 978649,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937262652.481, "dur": 0.542, + "args": { + "External id": 978650,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937262659.880, "dur": 5.733, + "args": { + "External id": 978651,"Record function id": 0, "Ev Idx": 1242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937262661.273, "dur": 3.820, + "args": { + "External id": 978652,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937262662.693, "dur": 2.040, + "args": { + "External id": 978653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937262663.401, "dur": 1.239, + "args": { + "External id": 978654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262668.649, "dur": 9.995, + "args": { + "External id": 978655,"Record function id": 0, "Sequence number": 10552334, "Fwd thread id": 1, "Ev Idx": 1246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262669.670, "dur": 6.536, + "args": { + "External id": 978656,"Sequence number": 10552334, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1247 + } + }, + { + "ph": "f", "id": 132, "pid": 2338706, "tid": 2379422, "ts": 6345937262669.670, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937262672.560, "dur": 3.510, + "args": { + "External id": 978657,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937262675.107, "dur": 0.835, + "args": { + "External id": 978658,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262681.578, "dur": 108.848, + "args": { + "External id": 978659,"Record function id": 0, "Sequence number": 10552333, "Fwd thread id": 1, "Ev Idx": 1250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262682.580, "dur": 99.717, + "args": { + "External id": 978660,"Sequence number": 10552333, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1251 + } + }, + { + "ph": "f", "id": 133, "pid": 2338706, "tid": 2379422, "ts": 6345937262682.580, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937262684.945, "dur": 2.395, + "args": { + "External id": 978661,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937262685.465, "dur": 1.477, + "args": { + "External id": 978662,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937262686.141, "dur": 0.700, + "args": { + "External id": 978663,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937262688.209, "dur": 41.632, + "args": { + "External id": 978664,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937262732.963, "dur": 2.950, + "args": { + "External id": 978665,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937262733.629, "dur": 1.803, + "args": { + "External id": 978666,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937262734.470, "dur": 0.813, + "args": { + "External id": 978667,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937262737.270, "dur": 5.356, + "args": { + "External id": 978668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937262738.593, "dur": 3.487, + "args": { + "External id": 978669,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937262741.590, "dur": 0.412, + "args": { + "External id": 978670,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937262745.135, "dur": 36.380, + "args": { + "External id": 978671,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262794.916, "dur": 23.825, + "args": { + "External id": 978672,"Record function id": 0, "Sequence number": 10552332, "Fwd thread id": 1, "Ev Idx": 1263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262795.829, "dur": 3.612, + "args": { + "External id": 978673,"Sequence number": 10552332, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1264 + } + }, + { + "ph": "f", "id": 134, "pid": 2338706, "tid": 2379422, "ts": 6345937262795.829, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937262797.274, "dur": 2.040, + "args": { + "External id": 978674,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937262798.012, "dur": 1.177, + "args": { + "External id": 978675,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345937262801.705, "dur": 14.715, + "args": { + "External id": 978676,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262822.304, "dur": 9.690, + "args": { + "External id": 978677,"Record function id": 0, "Sequence number": 10552331, "Fwd thread id": 1, "Ev Idx": 1268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937262823.266, "dur": 6.451, + "args": { + "External id": 978678,"Sequence number": 10552331, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1269 + } + }, + { + "ph": "f", "id": 135, "pid": 2338706, "tid": 2379422, "ts": 6345937262823.266, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937262824.188, "dur": 5.329, + "args": { + "External id": 978679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937262824.826, "dur": 4.202, + "args": { + "External id": 978680,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937262828.380, "dur": 0.507, + "args": { + "External id": 978681,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937262835.987, "dur": 5.366, + "args": { + "External id": 978682,"Record function id": 0, "Ev Idx": 1273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937262837.277, "dur": 3.573, + "args": { + "External id": 978683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937262838.633, "dur": 1.767, + "args": { + "External id": 978684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937262839.064, "dur": 1.255, + "args": { + "External id": 978685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937262847.934, "dur": 430.981, + "args": { + "External id": 978686,"Record function id": 0, "Sequence number": 10552330, "Fwd thread id": 1, "Ev Idx": 1277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937262849.092, "dur": 395.490, + "args": { + "External id": 978687,"Sequence number": 10552330, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1278 + } + }, + { + "ph": "f", "id": 136, "pid": 2338706, "tid": 2379422, "ts": 6345937262849.092, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937262880.814, "dur": 4.264, + "args": { + "External id": 978688,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937262883.947, "dur": 0.977, + "args": { + "External id": 978689,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937262899.703, "dur": 7.683, + "args": { + "External id": 978690,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937262916.656, "dur": 2.458, + "args": { + "External id": 978691,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937263130.756, "dur": 3.097, + "args": { + "External id": 978692,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937263138.507, "dur": 39.098, + "args": { + "External id": 978693,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937263152.847, "dur": 0.930, + "args": { + "External id": 978694,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937263183.697, "dur": 37.518, + "args": { + "External id": 978695,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937263185.760, "dur": 35.178, + "args": { + "External id": 978696,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937263194.368, "dur": 5.665, + "args": { + "External id": 978697,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937263201.588, "dur": 18.782, + "args": { + "External id": 978698,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345937263225.900, "dur": 3.024, + "args": { + "External id": 978699,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937263227.258, "dur": 1.523, + "args": { + "External id": 978700,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937263234.977, "dur": 1.882, + "args": { + "External id": 978701,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937263235.888, "dur": 0.884, + "args": { + "External id": 978702,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345937263260.269, "dur": 14.719, + "args": { + "External id": 978703,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937263292.195, "dur": 11.714, + "args": { + "External id": 978704,"Record function id": 0, "Ev Idx": 1295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937263296.980, "dur": 6.175, + "args": { + "External id": 978705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937263299.664, "dur": 2.424, + "args": { + "External id": 978706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937263300.506, "dur": 1.467, + "args": { + "External id": 978707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937263308.024, "dur": 7.696, + "args": { + "External id": 978708,"Record function id": 0, "Sequence number": 10552329, "Fwd thread id": 1, "Ev Idx": 1299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937263309.614, "dur": 1.600, + "args": { + "External id": 978709,"Sequence number": 10552329, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1300 + } + }, + { + "ph": "f", "id": 137, "pid": 2338706, "tid": 2379422, "ts": 6345937263309.614, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937263319.686, "dur": 420.518, + "args": { + "External id": 978710,"Record function id": 0, "Sequence number": 10552328, "Fwd thread id": 1, "Ev Idx": 1301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937263320.706, "dur": 404.552, + "args": { + "External id": 978711,"Sequence number": 10552328, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1302 + } + }, + { + "ph": "f", "id": 138, "pid": 2338706, "tid": 2379422, "ts": 6345937263320.706, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937263351.608, "dur": 10.332, + "args": { + "External id": 978712,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937263358.457, "dur": 3.126, + "args": { + "External id": 978713,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 1304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937263365.244, "dur": 7.302, + "args": { + "External id": 978714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937263366.795, "dur": 5.014, + "args": { + "External id": 978715,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937263370.796, "dur": 0.850, + "args": { + "External id": 978716,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2379422, + "ts": 6345937263376.186, "dur": 89.794, + "args": { + "External id": 978717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 1308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937263377.112, "dur": 5.819, + "args": { + "External id": 978718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 1309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937263377.723, "dur": 4.677, + "args": { + "External id": 978719,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937263381.542, "dur": 0.748, + "args": { + "External id": 978720,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2379422, + "ts": 6345937263384.114, "dur": 81.444, + "args": { + "External id": 978721,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 1312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937263385.429, "dur": 79.056, + "args": { + "External id": 978722,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 1313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345937263469.530, "dur": 2.312, + "args": { + "External id": 978723,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 1314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937263470.507, "dur": 1.200, + "args": { + "External id": 978724,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 1315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937263504.684, "dur": 6.500, + "args": { + "External id": 978725,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937263514.629, "dur": 3.735, + "args": { + "External id": 978726,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937263521.625, "dur": 2.280, + "args": { + "External id": 978727,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937263553.052, "dur": 2.127, + "args": { + "External id": 978728,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937263553.734, "dur": 1.298, + "args": { + "External id": 978729,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338706, "tid": 2379422, + "ts": 6345937263576.418, "dur": 131.079, + "args": { + "External id": 978730,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 1321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2379422, + "ts": 6345937263584.064, "dur": 6.956, + "args": { + "External id": 978731,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937263589.698, "dur": 0.647, + "args": { + "External id": 978732,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937263592.233, "dur": 6.403, + "args": { + "External id": 978733,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 1324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937263597.276, "dur": 0.569, + "args": { + "External id": 978734,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 1325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2379422, + "ts": 6345937263600.068, "dur": 1.709, + "args": { + "External id": 978735,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 1326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937263600.983, "dur": 0.485, + "args": { + "External id": 978736,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937263606.790, "dur": 2.293, + "args": { + "External id": 978737,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 1328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937263607.963, "dur": 0.517, + "args": { + "External id": 978738,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 1329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937263615.364, "dur": 6.693, + "args": { + "External id": 978739,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 1330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937263619.020, "dur": 2.693, + "args": { + "External id": 978740,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 1331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937263622.780, "dur": 7.139, + "args": { + "External id": 978741,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 1332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937263628.061, "dur": 1.688, + "args": { + "External id": 978742,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 1333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937263630.612, "dur": 4.087, + "args": { + "External id": 978743,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 1334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937263633.901, "dur": 0.501, + "args": { + "External id": 978744,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 1335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937263635.223, "dur": 4.132, + "args": { + "External id": 978745,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 1336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937263635.561, "dur": 3.679, + "args": { + "External id": 978746,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 1337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345937263642.676, "dur": 49.520, + "args": { + "External id": 978747,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 1338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937263696.324, "dur": 1.260, + "args": { + "External id": 978748,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 1339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345937263698.337, "dur": 4.695, + "args": { + "External id": 978749,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 1340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937263701.842, "dur": 0.622, + "args": { + "External id": 978750,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 1341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937263705.529, "dur": 0.969, + "args": { + "External id": 978751,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 1342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937263748.623, "dur": 30.592, + "args": { + "External id": 978752,"Record function id": 0, "Ev Idx": 1343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937263750.544, "dur": 27.954, + "args": { + "External id": 978753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937263752.446, "dur": 24.949, + "args": { + "External id": 978754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937263771.170, "dur": 6.016, + "args": { + "External id": 978755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937263783.246, "dur": 10.517, + "args": { + "External id": 978756,"Record function id": 0, "Sequence number": 10552327, "Fwd thread id": 1, "Ev Idx": 1347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937263784.193, "dur": 6.757, + "args": { + "External id": 978757,"Sequence number": 10552327, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1348 + } + }, + { + "ph": "f", "id": 139, "pid": 2338706, "tid": 2379422, "ts": 6345937263784.193, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937263786.407, "dur": 4.370, + "args": { + "External id": 978758,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937263789.404, "dur": 1.214, + "args": { + "External id": 978759,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937263797.328, "dur": 114.356, + "args": { + "External id": 978760,"Record function id": 0, "Sequence number": 10552326, "Fwd thread id": 1, "Ev Idx": 1351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937263798.315, "dur": 106.034, + "args": { + "External id": 978761,"Sequence number": 10552326, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1352 + } + }, + { + "ph": "f", "id": 140, "pid": 2338706, "tid": 2379422, "ts": 6345937263798.315, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937263801.935, "dur": 3.442, + "args": { + "External id": 978762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937263803.162, "dur": 1.709, + "args": { + "External id": 978763,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937263804.171, "dur": 0.530, + "args": { + "External id": 978764,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937263809.325, "dur": 43.272, + "args": { + "External id": 978765,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937263853.865, "dur": 6.719, + "args": { + "External id": 978766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937263854.675, "dur": 5.034, + "args": { + "External id": 978767,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937263858.473, "dur": 1.112, + "args": { + "External id": 978768,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937263862.156, "dur": 5.839, + "args": { + "External id": 978769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937263863.551, "dur": 3.893, + "args": { + "External id": 978770,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937263867.011, "dur": 0.353, + "args": { + "External id": 978771,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937263868.420, "dur": 35.230, + "args": { + "External id": 978772,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937263916.510, "dur": 8.649, + "args": { + "External id": 978773,"Record function id": 0, "Sequence number": 10552325, "Fwd thread id": 1, "Ev Idx": 1364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937263917.650, "dur": 6.243, + "args": { + "External id": 978774,"Sequence number": 10552325, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1365 + } + }, + { + "ph": "f", "id": 141, "pid": 2338706, "tid": 2379422, "ts": 6345937263917.650, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937263919.255, "dur": 4.490, + "args": { + "External id": 978775,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937263922.304, "dur": 1.287, + "args": { + "External id": 978776,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937263928.724, "dur": 10.705, + "args": { + "External id": 978777,"Record function id": 0, "Sequence number": 10552324, "Fwd thread id": 1, "Ev Idx": 1368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937263929.762, "dur": 6.584, + "args": { + "External id": 978778,"Sequence number": 10552324, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1369 + } + }, + { + "ph": "f", "id": 142, "pid": 2338706, "tid": 2379422, "ts": 6345937263929.762, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937263931.129, "dur": 4.980, + "args": { + "External id": 978779,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937263931.777, "dur": 3.856, + "args": { + "External id": 978780,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937263932.647, "dur": 2.866, + "args": { + "External id": 978781,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937263943.118, "dur": 7.468, + "args": { + "External id": 978782,"Record function id": 0, "Ev Idx": 1373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937263944.456, "dur": 5.601, + "args": { + "External id": 978783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937263945.389, "dur": 4.151, + "args": { + "External id": 978784,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937263948.457, "dur": 0.989, + "args": { + "External id": 978785,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937263953.773, "dur": 8.154, + "args": { + "External id": 978786,"Record function id": 0, "Sequence number": 10552323, "Fwd thread id": 1, "Ev Idx": 1377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937263954.818, "dur": 4.521, + "args": { + "External id": 978787,"Sequence number": 10552323, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1378 + } + }, + { + "ph": "f", "id": 143, "pid": 2338706, "tid": 2379422, "ts": 6345937263954.818, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937263957.797, "dur": 1.390, + "args": { + "External id": 978788,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937263958.431, "dur": 0.607, + "args": { + "External id": 978789,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937263965.105, "dur": 163.284, + "args": { + "External id": 978790,"Record function id": 0, "Sequence number": 10552322, "Fwd thread id": 1, "Ev Idx": 1381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937263966.054, "dur": 150.617, + "args": { + "External id": 978791,"Sequence number": 10552322, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1382 + } + }, + { + "ph": "f", "id": 144, "pid": 2338706, "tid": 2379422, "ts": 6345937263966.054, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937263968.975, "dur": 4.568, + "args": { + "External id": 978792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937263969.426, "dur": 3.671, + "args": { + "External id": 978793,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937263972.378, "dur": 0.611, + "args": { + "External id": 978794,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937263974.364, "dur": 51.585, + "args": { + "External id": 978795,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937264028.081, "dur": 4.504, + "args": { + "External id": 978796,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937264028.981, "dur": 2.859, + "args": { + "External id": 978797,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937264030.557, "dur": 1.181, + "args": { + "External id": 978798,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937264034.355, "dur": 8.020, + "args": { + "External id": 978799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937264038.819, "dur": 3.147, + "args": { + "External id": 978800,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937264041.575, "dur": 0.307, + "args": { + "External id": 978801,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937264042.962, "dur": 72.174, + "args": { + "External id": 978802,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937264138.592, "dur": 40.697, + "args": { + "External id": 978803,"Record function id": 0, "Sequence number": 10552321, "Fwd thread id": 1, "Ev Idx": 1394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937264139.764, "dur": 7.822, + "args": { + "External id": 978804,"Sequence number": 10552321, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1395 + } + }, + { + "ph": "f", "id": 145, "pid": 2338706, "tid": 2379422, "ts": 6345937264139.764, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937264141.828, "dur": 5.609, + "args": { + "External id": 978805,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937264142.789, "dur": 4.480, + "args": { + "External id": 978806,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2379422, + "ts": 6345937264150.408, "dur": 25.487, + "args": { + "External id": 978807,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937264183.495, "dur": 14.036, + "args": { + "External id": 978808,"Record function id": 0, "Sequence number": 10552320, "Fwd thread id": 1, "Ev Idx": 1399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937264184.720, "dur": 9.840, + "args": { + "External id": 978809,"Sequence number": 10552320, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1400 + } + }, + { + "ph": "f", "id": 146, "pid": 2338706, "tid": 2379422, "ts": 6345937264184.720, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937264186.074, "dur": 8.233, + "args": { + "External id": 978810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937264186.791, "dur": 6.982, + "args": { + "External id": 978811,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937264193.004, "dur": 0.645, + "args": { + "External id": 978812,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937264201.760, "dur": 6.053, + "args": { + "External id": 978813,"Record function id": 0, "Ev Idx": 1404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937264203.464, "dur": 3.794, + "args": { + "External id": 978814,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937264204.683, "dur": 2.115, + "args": { + "External id": 978815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937264205.261, "dur": 1.442, + "args": { + "External id": 978816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937264211.758, "dur": 421.237, + "args": { + "External id": 978817,"Record function id": 0, "Sequence number": 10552319, "Fwd thread id": 1, "Ev Idx": 1408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937264212.912, "dur": 383.407, + "args": { + "External id": 978818,"Sequence number": 10552319, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 1409 + } + }, + { + "ph": "f", "id": 147, "pid": 2338706, "tid": 2379422, "ts": 6345937264212.912, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2379422, + "ts": 6345937264237.729, "dur": 33.331, + "args": { + "External id": 978819,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345937264239.219, "dur": 31.639, + "args": { + "External id": 978820,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937264242.098, "dur": 6.084, + "args": { + "External id": 978821,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 1412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937264244.522, "dur": 3.071, + "args": { + "External id": 978822,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937264249.290, "dur": 21.108, + "args": { + "External id": 978823,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937264285.927, "dur": 4.091, + "args": { + "External id": 978824,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937264288.759, "dur": 1.119, + "args": { + "External id": 978825,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937264293.962, "dur": 1.616, + "args": { + "External id": 978826,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937264294.645, "dur": 0.841, + "args": { + "External id": 978827,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937264308.146, "dur": 2.875, + "args": { + "External id": 978828,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937264323.887, "dur": 4.474, + "args": { + "External id": 978829,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937264481.041, "dur": 1.888, + "args": { + "External id": 978830,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937264489.325, "dur": 35.687, + "args": { + "External id": 978831,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937264497.934, "dur": 0.788, + "args": { + "External id": 978832,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937264531.063, "dur": 32.817, + "args": { + "External id": 978833,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937264532.561, "dur": 30.964, + "args": { + "External id": 978834,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937264538.642, "dur": 4.038, + "args": { + "External id": 978835,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937264544.262, "dur": 18.671, + "args": { + "External id": 978836,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345937264570.607, "dur": 1.999, + "args": { + "External id": 978837,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937264571.592, "dur": 0.878, + "args": { + "External id": 978838,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937264579.068, "dur": 2.140, + "args": { + "External id": 978839,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937264580.083, "dur": 1.023, + "args": { + "External id": 978840,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937264583.195, "dur": 4.421, + "args": { + "External id": 978841,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937264586.231, "dur": 1.300, + "args": { + "External id": 978842,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345937264614.240, "dur": 17.291, + "args": { + "External id": 978843,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937264642.322, "dur": 9.415, + "args": { + "External id": 978844,"Record function id": 0, "Ev Idx": 1435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937264644.235, "dur": 6.677, + "args": { + "External id": 978845,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937264645.814, "dur": 3.994, + "args": { + "External id": 978846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937264648.502, "dur": 1.201, + "args": { + "External id": 978847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937264655.283, "dur": 7.494, + "args": { + "External id": 978848,"Record function id": 0, "Sequence number": 10552318, "Fwd thread id": 1, "Ev Idx": 1439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937264656.148, "dur": 3.978, + "args": { + "External id": 978849,"Sequence number": 10552318, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1440 + } + }, + { + "ph": "f", "id": 148, "pid": 2338706, "tid": 2379422, "ts": 6345937264656.148, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937264658.058, "dur": 1.854, + "args": { + "External id": 978850,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937264658.622, "dur": 1.160, + "args": { + "External id": 978851,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937264666.448, "dur": 135.332, + "args": { + "External id": 978852,"Record function id": 0, "Sequence number": 10552317, "Fwd thread id": 1, "Ev Idx": 1443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937264667.256, "dur": 127.375, + "args": { + "External id": 978853,"Sequence number": 10552317, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1444 + } + }, + { + "ph": "f", "id": 149, "pid": 2338706, "tid": 2379422, "ts": 6345937264667.256, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937264670.606, "dur": 6.564, + "args": { + "External id": 978854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937264671.966, "dur": 4.676, + "args": { + "External id": 978855,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937264675.826, "dur": 0.663, + "args": { + "External id": 978856,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937264678.351, "dur": 63.433, + "args": { + "External id": 978857,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937264743.217, "dur": 7.950, + "args": { + "External id": 978858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937264743.952, "dur": 6.510, + "args": { + "External id": 978859,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937264747.333, "dur": 3.011, + "args": { + "External id": 978860,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937264752.617, "dur": 5.176, + "args": { + "External id": 978861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937264754.026, "dur": 3.320, + "args": { + "External id": 978862,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937264756.842, "dur": 0.438, + "args": { + "External id": 978863,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937264758.292, "dur": 35.644, + "args": { + "External id": 978864,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937264806.431, "dur": 9.964, + "args": { + "External id": 978865,"Record function id": 0, "Sequence number": 10552316, "Fwd thread id": 1, "Ev Idx": 1456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937264807.471, "dur": 5.760, + "args": { + "External id": 978866,"Sequence number": 10552316, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1457 + } + }, + { + "ph": "f", "id": 150, "pid": 2338706, "tid": 2379422, "ts": 6345937264807.471, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937264808.733, "dur": 4.366, + "args": { + "External id": 978867,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937264811.905, "dur": 1.060, + "args": { + "External id": 978868,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937264821.969, "dur": 9.539, + "args": { + "External id": 978869,"Record function id": 0, "Sequence number": 10552315, "Fwd thread id": 1, "Ev Idx": 1460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937264822.753, "dur": 6.285, + "args": { + "External id": 978870,"Sequence number": 10552315, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1461 + } + }, + { + "ph": "f", "id": 151, "pid": 2338706, "tid": 2379422, "ts": 6345937264822.753, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937264823.693, "dur": 5.152, + "args": { + "External id": 978871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937264824.509, "dur": 3.885, + "args": { + "External id": 978872,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937264827.893, "dur": 0.351, + "args": { + "External id": 978873,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937264835.624, "dur": 5.336, + "args": { + "External id": 978874,"Record function id": 0, "Ev Idx": 1465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937264837.290, "dur": 3.178, + "args": { + "External id": 978875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937264838.342, "dur": 1.769, + "args": { + "External id": 978876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937264839.034, "dur": 0.954, + "args": { + "External id": 978877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937264843.938, "dur": 6.204, + "args": { + "External id": 978878,"Record function id": 0, "Sequence number": 10552314, "Fwd thread id": 1, "Ev Idx": 1469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937264844.731, "dur": 3.066, + "args": { + "External id": 978879,"Sequence number": 10552314, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1470 + } + }, + { + "ph": "f", "id": 152, "pid": 2338706, "tid": 2379422, "ts": 6345937264844.731, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937264846.080, "dur": 1.575, + "args": { + "External id": 978880,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937264846.462, "dur": 1.048, + "args": { + "External id": 978881,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937264856.506, "dur": 434.372, + "args": { + "External id": 978882,"Record function id": 0, "Sequence number": 10552313, "Fwd thread id": 1, "Ev Idx": 1473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937264857.735, "dur": 408.806, + "args": { + "External id": 978883,"Sequence number": 10552313, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1474 + } + }, + { + "ph": "f", "id": 153, "pid": 2338706, "tid": 2379422, "ts": 6345937264857.735, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937264874.100, "dur": 5.844, + "args": { + "External id": 978884,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937264876.449, "dur": 3.044, + "args": { + "External id": 978885,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937264881.805, "dur": 7.828, + "args": { + "External id": 978886,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937264884.909, "dur": 4.496, + "args": { + "External id": 978887,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937264891.073, "dur": 3.386, + "args": { + "External id": 978888,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937264891.969, "dur": 2.269, + "args": { + "External id": 978889,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937264920.785, "dur": 316.545, + "args": { + "External id": 978890,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937265006.534, "dur": 27.520, + "args": { + "External id": 978891,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937265036.948, "dur": 2.426, + "args": { + "External id": 978892,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937265040.411, "dur": 3.353, + "args": { + "External id": 978893,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937265044.805, "dur": 1.827, + "args": { + "External id": 978894,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937265130.820, "dur": 5.754, + "args": { + "External id": 978895,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937265134.246, "dur": 2.064, + "args": { + "External id": 978896,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937265138.335, "dur": 28.581, + "args": { + "External id": 978897,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937265145.142, "dur": 1.134, + "args": { + "External id": 978898,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937265170.540, "dur": 1.245, + "args": { + "External id": 978899,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937265171.192, "dur": 0.522, + "args": { + "External id": 978900,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937265172.438, "dur": 15.268, + "args": { + "External id": 978901,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937265173.766, "dur": 2.859, + "args": { + "External id": 978902,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937265252.122, "dur": 4.174, + "args": { + "External id": 978903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937265259.573, "dur": 0.776, + "args": { + "External id": 978904,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345937265262.887, "dur": 0.691, + "args": { + "External id": 978905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937265302.567, "dur": 237.436, + "args": { + "External id": 978906,"Record function id": 0, "Sequence number": 10552312, "Fwd thread id": 1, "Ev Idx": 1497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937265304.396, "dur": 228.379, + "args": { + "External id": 978907,"Sequence number": 10552312, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1498 + } + }, + { + "ph": "f", "id": 154, "pid": 2338706, "tid": 2379422, "ts": 6345937265304.396, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345937265326.321, "dur": 47.182, + "args": { + "External id": 978908,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937265332.158, "dur": 3.908, + "args": { + "External id": 978909,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937265337.755, "dur": 35.110, + "args": { + "External id": 978910,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 1501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937265384.233, "dur": 4.789, + "args": { + "External id": 978911,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937265385.826, "dur": 2.822, + "args": { + "External id": 978912,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937265547.225, "dur": 170.890, + "args": { + "External id": 978913,"Record function id": 0, "Sequence number": 10552311, "Fwd thread id": 1, "Ev Idx": 1504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937265548.923, "dur": 162.698, + "args": { + "External id": 978914,"Sequence number": 10552311, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1505 + } + }, + { + "ph": "f", "id": 155, "pid": 2338706, "tid": 2379422, "ts": 6345937265548.923, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345937265565.429, "dur": 41.132, + "args": { + "External id": 978915,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937265569.816, "dur": 2.624, + "args": { + "External id": 978916,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937265573.365, "dur": 32.658, + "args": { + "External id": 978917,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 1508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345937265613.545, "dur": 6.312, + "args": { + "External id": 978918,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937265614.673, "dur": 4.809, + "args": { + "External id": 978919,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937265724.448, "dur": 18.230, + "args": { + "External id": 978920,"Record function id": 0, "Sequence number": 10552310, "Fwd thread id": 1, "Ev Idx": 1511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937265726.163, "dur": 13.307, + "args": { + "External id": 978921,"Sequence number": 10552310, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1512 + } + }, + { + "ph": "f", "id": 156, "pid": 2338706, "tid": 2379422, "ts": 6345937265726.163, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937265728.647, "dur": 10.540, + "args": { + "External id": 978922,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937265732.371, "dur": 6.650, + "args": { + "External id": 978923,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937265748.501, "dur": 7.650, + "args": { + "External id": 978924,"Record function id": 0, "Sequence number": 10552309, "Fwd thread id": 1, "Ev Idx": 1515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937265749.538, "dur": 3.885, + "args": { + "External id": 978925,"Sequence number": 10552309, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1516 + } + }, + { + "ph": "f", "id": 157, "pid": 2338706, "tid": 2379422, "ts": 6345937265749.538, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937265751.351, "dur": 1.925, + "args": { + "External id": 978926,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937265752.067, "dur": 1.056, + "args": { + "External id": 978927,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937265759.415, "dur": 8.966, + "args": { + "External id": 978928,"Record function id": 0, "Sequence number": 10552308, "Fwd thread id": 1, "Ev Idx": 1519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937265760.516, "dur": 5.222, + "args": { + "External id": 978929,"Sequence number": 10552308, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1520 + } + }, + { + "ph": "f", "id": 158, "pid": 2338706, "tid": 2379422, "ts": 6345937265760.516, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937265761.833, "dur": 3.764, + "args": { + "External id": 978930,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937265764.704, "dur": 0.803, + "args": { + "External id": 978931,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937265771.608, "dur": 8.837, + "args": { + "External id": 978932,"Record function id": 0, "Sequence number": 10552307, "Fwd thread id": 1, "Ev Idx": 1523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937265772.503, "dur": 4.860, + "args": { + "External id": 978933,"Sequence number": 10552307, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1524 + } + }, + { + "ph": "f", "id": 159, "pid": 2338706, "tid": 2379422, "ts": 6345937265772.503, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937265773.822, "dur": 3.396, + "args": { + "External id": 978934,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937265776.235, "dur": 0.843, + "args": { + "External id": 978935,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937265783.716, "dur": 324.971, + "args": { + "External id": 978936,"Record function id": 0, "Sequence number": 10552306, "Fwd thread id": 1, "Ev Idx": 1527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937265784.666, "dur": 310.504, + "args": { + "External id": 978937,"Sequence number": 10552306, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1528 + } + }, + { + "ph": "f", "id": 160, "pid": 2338706, "tid": 2379422, "ts": 6345937265784.666, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937265789.656, "dur": 6.152, + "args": { + "External id": 978938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937265791.569, "dur": 3.514, + "args": { + "External id": 978939,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937265793.398, "dur": 1.383, + "args": { + "External id": 978940,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937265797.844, "dur": 72.228, + "args": { + "External id": 978941,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937265873.381, "dur": 6.498, + "args": { + "External id": 978942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937265874.220, "dur": 4.810, + "args": { + "External id": 978943,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937265877.786, "dur": 1.089, + "args": { + "External id": 978944,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937265972.619, "dur": 3.778, + "args": { + "External id": 978945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937265974.479, "dur": 1.217, + "args": { + "External id": 978946,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937265975.118, "dur": 0.441, + "args": { + "External id": 978947,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937265979.677, "dur": 113.144, + "args": { + "External id": 978948,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266119.771, "dur": 12.472, + "args": { + "External id": 978949,"Record function id": 0, "Sequence number": 10552305, "Fwd thread id": 1, "Ev Idx": 1540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266121.315, "dur": 8.573, + "args": { + "External id": 978950,"Sequence number": 10552305, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1541 + } + }, + { + "ph": "f", "id": 161, "pid": 2338706, "tid": 2379422, "ts": 6345937266121.315, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937266123.566, "dur": 6.122, + "args": { + "External id": 978951,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937266127.108, "dur": 2.452, + "args": { + "External id": 978952,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266135.906, "dur": 9.454, + "args": { + "External id": 978953,"Record function id": 0, "Sequence number": 10552304, "Fwd thread id": 1, "Ev Idx": 1544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266136.787, "dur": 6.616, + "args": { + "External id": 978954,"Sequence number": 10552304, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1545 + } + }, + { + "ph": "f", "id": 162, "pid": 2338706, "tid": 2379422, "ts": 6345937266136.787, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937266137.963, "dur": 5.235, + "args": { + "External id": 978955,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937266138.738, "dur": 3.919, + "args": { + "External id": 978956,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937266141.850, "dur": 0.648, + "args": { + "External id": 978957,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937266151.283, "dur": 9.884, + "args": { + "External id": 978958,"Record function id": 0, "Ev Idx": 1549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937266152.664, "dur": 7.572, + "args": { + "External id": 978959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937266155.428, "dur": 4.276, + "args": { + "External id": 978960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937266156.638, "dur": 2.947, + "args": { + "External id": 978961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266164.398, "dur": 7.634, + "args": { + "External id": 978962,"Record function id": 0, "Sequence number": 10552303, "Fwd thread id": 1, "Ev Idx": 1553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266165.487, "dur": 4.645, + "args": { + "External id": 978963,"Sequence number": 10552303, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1554 + } + }, + { + "ph": "f", "id": 163, "pid": 2338706, "tid": 2379422, "ts": 6345937266165.487, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937266168.356, "dur": 1.612, + "args": { + "External id": 978964,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937266168.765, "dur": 1.049, + "args": { + "External id": 978965,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266175.330, "dur": 119.850, + "args": { + "External id": 978966,"Record function id": 0, "Sequence number": 10552302, "Fwd thread id": 1, "Ev Idx": 1557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266176.196, "dur": 110.342, + "args": { + "External id": 978967,"Sequence number": 10552302, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1558 + } + }, + { + "ph": "f", "id": 164, "pid": 2338706, "tid": 2379422, "ts": 6345937266176.196, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937266181.229, "dur": 11.487, + "args": { + "External id": 978968,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937266181.634, "dur": 10.580, + "args": { + "External id": 978969,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937266191.163, "dur": 0.763, + "args": { + "External id": 978970,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937266193.626, "dur": 38.713, + "args": { + "External id": 978971,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937266233.394, "dur": 6.931, + "args": { + "External id": 978972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937266234.047, "dur": 5.586, + "args": { + "External id": 978973,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937266237.377, "dur": 2.074, + "args": { + "External id": 978974,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937266241.869, "dur": 7.222, + "args": { + "External id": 978975,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937266243.345, "dur": 5.366, + "args": { + "External id": 978976,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937266246.181, "dur": 2.462, + "args": { + "External id": 978977,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937266249.643, "dur": 36.177, + "args": { + "External id": 978978,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266302.478, "dur": 32.969, + "args": { + "External id": 978979,"Record function id": 0, "Sequence number": 10552301, "Fwd thread id": 1, "Ev Idx": 1570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266303.491, "dur": 3.362, + "args": { + "External id": 978980,"Sequence number": 10552301, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1571 + } + }, + { + "ph": "f", "id": 165, "pid": 2338706, "tid": 2379422, "ts": 6345937266303.491, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937266304.684, "dur": 2.023, + "args": { + "External id": 978981,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937266305.364, "dur": 1.179, + "args": { + "External id": 978982,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2379422, + "ts": 6345937266310.570, "dur": 22.084, + "args": { + "External id": 978983,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266341.298, "dur": 12.558, + "args": { + "External id": 978984,"Record function id": 0, "Sequence number": 10552300, "Fwd thread id": 1, "Ev Idx": 1575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266342.207, "dur": 8.744, + "args": { + "External id": 978985,"Sequence number": 10552300, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1576 + } + }, + { + "ph": "f", "id": 166, "pid": 2338706, "tid": 2379422, "ts": 6345937266342.207, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937266343.180, "dur": 7.551, + "args": { + "External id": 978986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937266346.018, "dur": 4.153, + "args": { + "External id": 978987,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937266349.504, "dur": 0.538, + "args": { + "External id": 978988,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937266357.771, "dur": 5.169, + "args": { + "External id": 978989,"Record function id": 0, "Ev Idx": 1580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937266359.000, "dur": 3.476, + "args": { + "External id": 978990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937266360.121, "dur": 1.860, + "args": { + "External id": 978991,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937266360.529, "dur": 1.355, + "args": { + "External id": 978992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266366.080, "dur": 6.399, + "args": { + "External id": 978993,"Record function id": 0, "Sequence number": 10552299, "Fwd thread id": 1, "Ev Idx": 1584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266367.526, "dur": 2.827, + "args": { + "External id": 978994,"Sequence number": 10552299, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1585 + } + }, + { + "ph": "f", "id": 167, "pid": 2338706, "tid": 2379422, "ts": 6345937266367.526, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937266368.644, "dur": 1.557, + "args": { + "External id": 978995,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937266369.050, "dur": 1.060, + "args": { + "External id": 978996,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266375.460, "dur": 100.294, + "args": { + "External id": 978997,"Record function id": 0, "Sequence number": 10552298, "Fwd thread id": 1, "Ev Idx": 1588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266378.439, "dur": 89.168, + "args": { + "External id": 978998,"Sequence number": 10552298, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1589 + } + }, + { + "ph": "f", "id": 168, "pid": 2338706, "tid": 2379422, "ts": 6345937266378.439, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937266381.909, "dur": 2.161, + "args": { + "External id": 978999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937266382.505, "dur": 1.144, + "args": { + "External id": 979000,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937266383.131, "dur": 0.407, + "args": { + "External id": 979001,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937266384.838, "dur": 37.812, + "args": { + "External id": 979002,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937266423.594, "dur": 6.682, + "args": { + "External id": 979003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937266424.085, "dur": 5.617, + "args": { + "External id": 979004,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937266428.858, "dur": 0.731, + "args": { + "External id": 979005,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937266431.699, "dur": 2.483, + "args": { + "External id": 979006,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937266432.483, "dur": 1.168, + "args": { + "External id": 979007,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937266433.118, "dur": 0.464, + "args": { + "External id": 979008,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937266434.808, "dur": 32.139, + "args": { + "External id": 979009,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266480.394, "dur": 29.126, + "args": { + "External id": 979010,"Record function id": 0, "Sequence number": 10552297, "Fwd thread id": 1, "Ev Idx": 1601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266481.292, "dur": 8.278, + "args": { + "External id": 979011,"Sequence number": 10552297, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1602 + } + }, + { + "ph": "f", "id": 169, "pid": 2338706, "tid": 2379422, "ts": 6345937266481.292, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937266485.307, "dur": 4.110, + "args": { + "External id": 979012,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937266488.071, "dur": 1.225, + "args": { + "External id": 979013,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345937266492.059, "dur": 14.746, + "args": { + "External id": 979014,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266513.040, "dur": 6.956, + "args": { + "External id": 979015,"Record function id": 0, "Sequence number": 10552296, "Fwd thread id": 1, "Ev Idx": 1606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345937266514.139, "dur": 3.838, + "args": { + "External id": 979016,"Sequence number": 10552296, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1607 + } + }, + { + "ph": "f", "id": 170, "pid": 2338706, "tid": 2379422, "ts": 6345937266514.139, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345937266515.374, "dur": 2.415, + "args": { + "External id": 979017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345937266515.808, "dur": 1.501, + "args": { + "External id": 979018,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937266516.607, "dur": 0.589, + "args": { + "External id": 979019,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937266523.798, "dur": 4.420, + "args": { + "External id": 979020,"Record function id": 0, "Ev Idx": 1611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937266524.972, "dur": 2.668, + "args": { + "External id": 979021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937266525.976, "dur": 1.380, + "args": { + "External id": 979022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937266526.425, "dur": 0.840, + "args": { + "External id": 979023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937266532.635, "dur": 376.579, + "args": { + "External id": 979024,"Record function id": 0, "Sequence number": 10552295, "Fwd thread id": 1, "Ev Idx": 1615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937266533.991, "dur": 343.541, + "args": { + "External id": 979025,"Sequence number": 10552295, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1616 + } + }, + { + "ph": "f", "id": 171, "pid": 2338706, "tid": 2379422, "ts": 6345937266533.991, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937266581.887, "dur": 1.582, + "args": { + "External id": 979026,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937266582.292, "dur": 0.978, + "args": { + "External id": 979027,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937266599.116, "dur": 9.949, + "args": { + "External id": 979028,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937266618.891, "dur": 2.492, + "args": { + "External id": 979029,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937266772.771, "dur": 1.597, + "args": { + "External id": 979030,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937266778.338, "dur": 36.105, + "args": { + "External id": 979031,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937266791.512, "dur": 0.929, + "args": { + "External id": 979032,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937266819.830, "dur": 31.988, + "args": { + "External id": 979033,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937266824.012, "dur": 27.601, + "args": { + "External id": 979034,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937266827.807, "dur": 4.026, + "args": { + "External id": 979035,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937266833.400, "dur": 17.603, + "args": { + "External id": 979036,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345937266855.846, "dur": 2.188, + "args": { + "External id": 979037,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937266856.933, "dur": 0.997, + "args": { + "External id": 979038,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937266866.453, "dur": 4.163, + "args": { + "External id": 979039,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937266869.476, "dur": 1.003, + "args": { + "External id": 979040,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345937266890.030, "dur": 14.864, + "args": { + "External id": 979041,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937266918.002, "dur": 7.589, + "args": { + "External id": 979042,"Record function id": 0, "Ev Idx": 1633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937266920.386, "dur": 4.626, + "args": { + "External id": 979043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937266921.989, "dur": 2.172, + "args": { + "External id": 979044,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937266922.825, "dur": 1.241, + "args": { + "External id": 979045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937266930.292, "dur": 2953.855, + "args": { + "External id": 979046,"Record function id": 0, "Ev Idx": 1637 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.25)", "pid": 2338706, "tid": 2379422, + "ts": 6345937266962.712, "dur": 1085.831, + "args": { + "External id": 979047,"Record function id": 0, "Ev Idx": 1638 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.24", "pid": 2338706, "tid": 2379422, + "ts": 6345937266989.940, "dur": 1048.854, + "args": { + "External id": 979048,"Record function id": 0, "Ev Idx": 1639 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338706, "tid": 2379422, + "ts": 6345937267006.162, "dur": 994.511, + "args": { + "External id": 979049,"Record function id": 0, "Ev Idx": 1640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937267164.939, "dur": 8.835, + "args": { + "External id": 979050,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345937267193.060, "dur": 40.700, + "args": { + "External id": 979051,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937267200.804, "dur": 1.399, + "args": { + "External id": 979052,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937267204.009, "dur": 2.897, + "args": { + "External id": 979053,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937267209.310, "dur": 0.347, + "args": { + "External id": 979054,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937267212.628, "dur": 0.574, + "args": { + "External id": 979055,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937267213.703, "dur": 0.268, + "args": { + "External id": 979056,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937267218.943, "dur": 0.246, + "args": { + "External id": 979057,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937267219.927, "dur": 0.498, + "args": { + "External id": 979058,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937267221.429, "dur": 2.240, + "args": { + "External id": 979059,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937267226.794, "dur": 0.247, + "args": { + "External id": 979060,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937267246.986, "dur": 52.650, + "args": { + "External id": 979061,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345937267345.105, "dur": 133.761, + "args": { + "External id": 979062,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 1653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937267357.101, "dur": 6.240, + "args": { + "External id": 979063,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345937267372.529, "dur": 12.425, + "args": { + "External id": 979064,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937267376.807, "dur": 7.652, + "args": { + "External id": 979065,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 1656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937267382.239, "dur": 0.652, + "args": { + "External id": 979066,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345937267391.892, "dur": 31.368, + "args": { + "External id": 979067,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937267393.521, "dur": 0.670, + "args": { + "External id": 979068,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937267397.161, "dur": 2.259, + "args": { + "External id": 979069,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937267400.408, "dur": 0.382, + "args": { + "External id": 979070,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937267403.441, "dur": 0.372, + "args": { + "External id": 979071,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937267406.196, "dur": 0.374, + "args": { + "External id": 979072,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937267407.533, "dur": 0.238, + "args": { + "External id": 979073,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937267410.094, "dur": 2.704, + "args": { + "External id": 979074,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937267415.612, "dur": 0.614, + "args": { + "External id": 979075,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937267416.916, "dur": 0.473, + "args": { + "External id": 979076,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937267435.852, "dur": 33.576, + "args": { + "External id": 979077,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345937267543.270, "dur": 354.582, + "args": { + "External id": 979078,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 1669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937267580.167, "dur": 312.754, + "args": { + "External id": 979079,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1670, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345937267590.496, "dur": 296.830, + "args": { + "External id": 979080,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 1671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937267919.499, "dur": 2.273, + "args": { + "External id": 979081,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1672, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937268089.872, "dur": 1767.871, + "args": { + "External id": 979082,"Sequence number": 10552294, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1673 + } + }, + { + "ph": "f", "id": 172, "pid": 2338706, "tid": 2379422, "ts": 6345937268089.872, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937268221.539, "dur": 114.028, + "args": { + "External id": 979083,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 1674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345937268384.089, "dur": 41.216, + "args": { + "External id": 979084,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 1675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345937268448.099, "dur": 48.013, + "args": { + "External id": 979085,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 1676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937268506.441, "dur": 30.343, + "args": { + "External id": 979086,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937268542.982, "dur": 32.820, + "args": { + "External id": 979087,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937268584.867, "dur": 27.844, + "args": { + "External id": 979088,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937268620.407, "dur": 28.144, + "args": { + "External id": 979089,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345937268676.293, "dur": 23.984, + "args": { + "External id": 979090,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 1681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345937268722.288, "dur": 31.668, + "args": { + "External id": 979091,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937268782.286, "dur": 19.613, + "args": { + "External id": 979092,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937268818.365, "dur": 14.774, + "args": { + "External id": 979093,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937268845.268, "dur": 35.116, + "args": { + "External id": 979094,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937268883.859, "dur": 31.700, + "args": { + "External id": 979095,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937268947.300, "dur": 331.939, + "args": { + "External id": 979096,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937269045.329, "dur": 55.955, + "args": { + "External id": 979097,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937269104.629, "dur": 6.219, + "args": { + "External id": 979098,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937269112.192, "dur": 2.118, + "args": { + "External id": 979099,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937269115.711, "dur": 1.606, + "args": { + "External id": 979100,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937269164.198, "dur": 6.508, + "args": { + "External id": 979101,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937269167.669, "dur": 2.843, + "args": { + "External id": 979102,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937269172.586, "dur": 33.174, + "args": { + "External id": 979103,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937269178.186, "dur": 1.747, + "args": { + "External id": 979104,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937269206.883, "dur": 3.523, + "args": { + "External id": 979105,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937269209.858, "dur": 0.455, + "args": { + "External id": 979106,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937269211.315, "dur": 19.138, + "args": { + "External id": 979107,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937269214.386, "dur": 2.438, + "args": { + "External id": 979108,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345937269324.732, "dur": 30.019, + "args": { + "External id": 979109,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937269373.313, "dur": 16.347, + "args": { + "External id": 979110,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937269397.177, "dur": 47.143, + "args": { + "External id": 979111,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937269453.447, "dur": 38.587, + "args": { + "External id": 979112,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937269500.427, "dur": 21.240, + "args": { + "External id": 979113,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937269527.142, "dur": 31.053, + "args": { + "External id": 979114,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937269565.135, "dur": 27.054, + "args": { + "External id": 979115,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937269601.568, "dur": 30.331, + "args": { + "External id": 979116,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345937269654.707, "dur": 24.040, + "args": { + "External id": 979117,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 1708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937269697.953, "dur": 25.216, + "args": { + "External id": 979118,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937269742.341, "dur": 16.514, + "args": { + "External id": 979119,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937269777.325, "dur": 13.405, + "args": { + "External id": 979120,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345937269808.396, "dur": 19.023, + "args": { + "External id": 979121,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 1712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937269906.655, "dur": 17.336, + "args": { + "External id": 979122,"Record function id": 0, "Ev Idx": 1713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937269910.513, "dur": 12.636, + "args": { + "External id": 979123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937269914.819, "dur": 7.303, + "args": { + "External id": 979124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937269916.208, "dur": 5.801, + "args": { + "External id": 979125,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937269927.733, "dur": 4.751, + "args": { + "External id": 979126,"Record function id": 0, "Ev Idx": 1717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937269929.353, "dur": 2.620, + "args": { + "External id": 979127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937269930.118, "dur": 1.388, + "args": { + "External id": 979128,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937269930.637, "dur": 0.803, + "args": { + "External id": 979129,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937269935.991, "dur": 4.631, + "args": { + "External id": 979130,"Record function id": 0, "Ev Idx": 1721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937269937.320, "dur": 2.863, + "args": { + "External id": 979131,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937269938.035, "dur": 1.659, + "args": { + "External id": 979132,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937269938.797, "dur": 0.785, + "args": { + "External id": 979133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937269944.180, "dur": 4.177, + "args": { + "External id": 979134,"Record function id": 0, "Ev Idx": 1725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937269945.215, "dur": 2.694, + "args": { + "External id": 979135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937269945.999, "dur": 1.365, + "args": { + "External id": 979136,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937269946.658, "dur": 0.615, + "args": { + "External id": 979137,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937269951.506, "dur": 26.316, + "args": { + "External id": 979138,"Record function id": 0, "Ev Idx": 1729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937269952.953, "dur": 24.265, + "args": { + "External id": 979139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937269974.938, "dur": 1.433, + "args": { + "External id": 979140,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937269975.488, "dur": 0.771, + "args": { + "External id": 979141,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937269981.284, "dur": 8.648, + "args": { + "External id": 979142,"Record function id": 0, "Ev Idx": 1733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937269984.291, "dur": 5.179, + "args": { + "External id": 979143,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937269985.069, "dur": 3.893, + "args": { + "External id": 979144,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937269988.252, "dur": 0.601, + "args": { + "External id": 979145,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937269995.180, "dur": 4.349, + "args": { + "External id": 979146,"Record function id": 0, "Ev Idx": 1737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937269996.378, "dur": 2.748, + "args": { + "External id": 979147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937269997.080, "dur": 1.315, + "args": { + "External id": 979148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937269997.586, "dur": 0.747, + "args": { + "External id": 979149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937270002.797, "dur": 26.348, + "args": { + "External id": 979150,"Record function id": 0, "Ev Idx": 1741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937270004.029, "dur": 23.982, + "args": { + "External id": 979151,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937270004.840, "dur": 22.200, + "args": { + "External id": 979152,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937270005.100, "dur": 21.358, + "args": { + "External id": 979153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937270034.790, "dur": 6.086, + "args": { + "External id": 979154,"Record function id": 0, "Ev Idx": 1745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937270036.763, "dur": 3.697, + "args": { + "External id": 979155,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937270037.925, "dur": 1.637, + "args": { + "External id": 979156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937270038.397, "dur": 1.096, + "args": { + "External id": 979157,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937270045.316, "dur": 281002.980, + "args": { + "External id": 979158,"Record function id": 0, "Sequence number": 10552293, "Fwd thread id": 1, "Ev Idx": 1749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937270047.131, "dur": 280990.180, + "args": { + "External id": 979159,"Sequence number": 10552293, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1750 + } + }, + { + "ph": "f", "id": 173, "pid": 2338706, "tid": 2379422, "ts": 6345937270047.131, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.25)", "pid": 2338706, "tid": 2379422, + "ts": 6345937270115.029, "dur": 45.527, + "args": { + "External id": 979160,"Record function id": 0, "Ev Idx": 1751 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.25)", "pid": 2338706, "tid": 2379422, + "ts": 6345937270170.430, "dur": 96.123, + "args": { + "External id": 979161,"Record function id": 0, "Ev Idx": 1752 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.25)", "pid": 2338706, "tid": 2379422, + "ts": 6345937270272.657, "dur": 280754.243, + "args": { + "External id": 979162,"Record function id": 0, "Ev Idx": 1753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937270331.693, "dur": 9.444, + "args": { + "External id": 979163,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937270351.587, "dur": 5.164, + "args": { + "External id": 979164,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345937270374.695, "dur": 279721.607, + "args": { + "External id": 979165,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345937270388.360, "dur": 279692.220, + "args": { + "External id": 979166,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937270530.415, "dur": 5.294, + "args": { + "External id": 979167,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937270561.716, "dur": 279431.963, + "args": { + "External id": 979168,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 1759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937270564.846, "dur": 279426.781, + "args": { + "External id": 979169,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 1760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937270568.771, "dur": 13.633, + "args": { + "External id": 979170,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937270584.693, "dur": 279401.004, + "args": { + "External id": 979171,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 1762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937550218.695, "dur": 18.310, + "args": { + "External id": 979172,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 1763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937550226.382, "dur": 10.208, + "args": { + "External id": 979173,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345937550278.031, "dur": 398.196, + "args": { + "External id": 979174,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 1765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937550315.131, "dur": 356.247, + "args": { + "External id": 979175,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1766, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345937550327.377, "dur": 338.142, + "args": { + "External id": 979176,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 1767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937550697.409, "dur": 2.383, + "args": { + "External id": 979177,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1768, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937550760.436, "dur": 7.236, + "args": { + "External id": 979178,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937550832.565, "dur": 1.280, + "args": { + "External id": 979179,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937550852.220, "dur": 1.145, + "args": { + "External id": 979180,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937550868.287, "dur": 0.823, + "args": { + "External id": 979181,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937550882.360, "dur": 0.916, + "args": { + "External id": 979182,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937550897.761, "dur": 0.964, + "args": { + "External id": 979183,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937550909.165, "dur": 1.305, + "args": { + "External id": 979184,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937550924.874, "dur": 2.740, + "args": { + "External id": 979185,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937550939.880, "dur": 0.787, + "args": { + "External id": 979186,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937551098.260, "dur": 3116.500, + "args": { + "External id": 979187,"Record function id": 0, "Ev Idx": 1778 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.24)", "pid": 2338706, "tid": 2379422, + "ts": 6345937551123.443, "dur": 1205.613, + "args": { + "External id": 979188,"Record function id": 0, "Ev Idx": 1779 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338706, "tid": 2379422, + "ts": 6345937551140.547, "dur": 374.816, + "args": { + "External id": 979189,"Record function id": 0, "Ev Idx": 1780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937551246.340, "dur": 5.583, + "args": { + "External id": 979190,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937551255.108, "dur": 0.879, + "args": { + "External id": 979191,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937551257.921, "dur": 0.988, + "args": { + "External id": 979192,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937551260.577, "dur": 0.812, + "args": { + "External id": 979193,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937551262.684, "dur": 0.795, + "args": { + "External id": 979194,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937551267.146, "dur": 1.212, + "args": { + "External id": 979195,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937551271.826, "dur": 3.273, + "args": { + "External id": 979196,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937551276.985, "dur": 3.303, + "args": { + "External id": 979197,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937551281.547, "dur": 1.187, + "args": { + "External id": 979198,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937551286.146, "dur": 1.063, + "args": { + "External id": 979199,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937551308.181, "dur": 173.802, + "args": { + "External id": 979200,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937551328.856, "dur": 148.656, + "args": { + "External id": 979201,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937551349.473, "dur": 15.295, + "args": { + "External id": 979202,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937551370.444, "dur": 74.305, + "args": { + "External id": 979203,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 1794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937551373.218, "dur": 71.234, + "args": { + "External id": 979204,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 1795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551379.395, "dur": 8.404, + "args": { + "External id": 979205,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937551389.628, "dur": 54.295, + "args": { + "External id": 979206,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 1797 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.23", "pid": 2338706, "tid": 2379422, + "ts": 6345937551610.972, "dur": 709.401, + "args": { + "External id": 979207,"Record function id": 0, "Ev Idx": 1798 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338706, "tid": 2379422, + "ts": 6345937551628.575, "dur": 677.889, + "args": { + "External id": 979208,"Record function id": 0, "Ev Idx": 1799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937551691.806, "dur": 5.039, + "args": { + "External id": 979209,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345937551712.828, "dur": 37.234, + "args": { + "External id": 979210,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551719.073, "dur": 1.709, + "args": { + "External id": 979211,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551722.145, "dur": 2.514, + "args": { + "External id": 979212,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551725.621, "dur": 2.728, + "args": { + "External id": 979213,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551730.801, "dur": 0.509, + "args": { + "External id": 979214,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551733.649, "dur": 0.735, + "args": { + "External id": 979215,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551736.830, "dur": 0.350, + "args": { + "External id": 979216,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551737.807, "dur": 0.279, + "args": { + "External id": 979217,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551740.429, "dur": 0.367, + "args": { + "External id": 979218,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551742.922, "dur": 0.269, + "args": { + "External id": 979219,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937551759.905, "dur": 43.518, + "args": { + "External id": 979220,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345937551834.178, "dur": 149.781, + "args": { + "External id": 979221,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 1812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937551844.447, "dur": 3.342, + "args": { + "External id": 979222,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345937551852.834, "dur": 39.686, + "args": { + "External id": 979223,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937551883.773, "dur": 8.303, + "args": { + "External id": 979224,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 1815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551887.778, "dur": 2.758, + "args": { + "External id": 979225,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345937551900.226, "dur": 31.710, + "args": { + "External id": 979226,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551902.445, "dur": 0.725, + "args": { + "External id": 979227,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551905.843, "dur": 0.482, + "args": { + "External id": 979228,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551907.844, "dur": 0.457, + "args": { + "External id": 979229,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551909.599, "dur": 1.566, + "args": { + "External id": 979230,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551912.696, "dur": 0.307, + "args": { + "External id": 979231,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551914.526, "dur": 0.742, + "args": { + "External id": 979232,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551919.514, "dur": 0.334, + "args": { + "External id": 979233,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551920.985, "dur": 2.003, + "args": { + "External id": 979234,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937551924.604, "dur": 0.612, + "args": { + "External id": 979235,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937551944.922, "dur": 30.466, + "args": { + "External id": 979236,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345937552084.455, "dur": 139.959, + "args": { + "External id": 979237,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 1828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937552121.096, "dur": 99.335, + "args": { + "External id": 979238,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1829, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345937552132.180, "dur": 83.961, + "args": { + "External id": 979239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 1830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937552243.630, "dur": 1.787, + "args": { + "External id": 979240,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1831, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937552336.453, "dur": 1851.107, + "args": { + "External id": 979241,"Sequence number": 10552292, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1832 + } + }, + { + "ph": "f", "id": 174, "pid": 2338706, "tid": 2379422, "ts": 6345937552336.453, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937552456.158, "dur": 105.961, + "args": { + "External id": 979242,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 1833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345937552606.952, "dur": 45.421, + "args": { + "External id": 979243,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 1834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345937552673.170, "dur": 51.489, + "args": { + "External id": 979244,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 1835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937552735.502, "dur": 31.146, + "args": { + "External id": 979245,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937552775.830, "dur": 31.269, + "args": { + "External id": 979246,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937552815.929, "dur": 26.917, + "args": { + "External id": 979247,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937552852.175, "dur": 28.826, + "args": { + "External id": 979248,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345937552910.085, "dur": 24.734, + "args": { + "External id": 979249,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 1840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345937552957.006, "dur": 29.480, + "args": { + "External id": 979250,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937553032.239, "dur": 58.881, + "args": { + "External id": 979251,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937553114.836, "dur": 19.730, + "args": { + "External id": 979252,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937553145.987, "dur": 43.187, + "args": { + "External id": 979253,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937553193.274, "dur": 32.902, + "args": { + "External id": 979254,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937553257.960, "dur": 265.395, + "args": { + "External id": 979255,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937553348.628, "dur": 6.816, + "args": { + "External id": 979256,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937553357.617, "dur": 2.180, + "args": { + "External id": 979257,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937553361.155, "dur": 4.248, + "args": { + "External id": 979258,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937553366.603, "dur": 3.086, + "args": { + "External id": 979259,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937553415.028, "dur": 5.353, + "args": { + "External id": 979260,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937553417.144, "dur": 3.032, + "args": { + "External id": 979261,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937553422.889, "dur": 35.033, + "args": { + "External id": 979262,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937553428.600, "dur": 3.501, + "args": { + "External id": 979263,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937553459.676, "dur": 2.144, + "args": { + "External id": 979264,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937553461.066, "dur": 0.655, + "args": { + "External id": 979265,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937553462.930, "dur": 17.211, + "args": { + "External id": 979266,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937553466.670, "dur": 0.631, + "args": { + "External id": 979267,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345937553566.786, "dur": 30.347, + "args": { + "External id": 979268,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937553615.270, "dur": 16.414, + "args": { + "External id": 979269,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937553639.226, "dur": 50.620, + "args": { + "External id": 979270,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937553703.607, "dur": 44.122, + "args": { + "External id": 979271,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937553758.936, "dur": 19.823, + "args": { + "External id": 979272,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937553784.228, "dur": 32.984, + "args": { + "External id": 979273,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937553824.395, "dur": 26.367, + "args": { + "External id": 979274,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937553857.717, "dur": 31.939, + "args": { + "External id": 979275,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345937553913.848, "dur": 27.361, + "args": { + "External id": 979276,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 1867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937553960.494, "dur": 26.841, + "args": { + "External id": 979277,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937554003.170, "dur": 39.554, + "args": { + "External id": 979278,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937554097.117, "dur": 19.773, + "args": { + "External id": 979279,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345937554135.503, "dur": 20.086, + "args": { + "External id": 979280,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 1871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937554238.805, "dur": 16.954, + "args": { + "External id": 979281,"Record function id": 0, "Ev Idx": 1872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937554242.710, "dur": 12.007, + "args": { + "External id": 979282,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937554247.276, "dur": 6.426, + "args": { + "External id": 979283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937554249.149, "dur": 4.467, + "args": { + "External id": 979284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937554259.688, "dur": 7.309, + "args": { + "External id": 979285,"Record function id": 0, "Ev Idx": 1876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937554261.253, "dur": 5.295, + "args": { + "External id": 979286,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937554262.223, "dur": 3.807, + "args": { + "External id": 979287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937554262.998, "dur": 2.911, + "args": { + "External id": 979288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937554270.413, "dur": 4.491, + "args": { + "External id": 979289,"Record function id": 0, "Ev Idx": 1880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937554271.762, "dur": 2.715, + "args": { + "External id": 979290,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937554272.250, "dur": 1.783, + "args": { + "External id": 979291,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937554272.800, "dur": 1.156, + "args": { + "External id": 979292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937554278.157, "dur": 3.793, + "args": { + "External id": 979293,"Record function id": 0, "Ev Idx": 1884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937554279.544, "dur": 1.998, + "args": { + "External id": 979294,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937554280.019, "dur": 0.983, + "args": { + "External id": 979295,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937554280.284, "dur": 0.641, + "args": { + "External id": 979296,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937554285.232, "dur": 4.180, + "args": { + "External id": 979297,"Record function id": 0, "Ev Idx": 1888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937554286.584, "dur": 2.408, + "args": { + "External id": 979298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937554287.440, "dur": 0.937, + "args": { + "External id": 979299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937554287.700, "dur": 0.604, + "args": { + "External id": 979300,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937554292.872, "dur": 3.888, + "args": { + "External id": 979301,"Record function id": 0, "Ev Idx": 1892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937554294.170, "dur": 2.173, + "args": { + "External id": 979302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937554294.687, "dur": 1.237, + "args": { + "External id": 979303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937554295.040, "dur": 0.817, + "args": { + "External id": 979304,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937554300.086, "dur": 26.429, + "args": { + "External id": 979305,"Record function id": 0, "Ev Idx": 1896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937554301.690, "dur": 24.306, + "args": { + "External id": 979306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937554302.309, "dur": 23.132, + "args": { + "External id": 979307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937554324.151, "dur": 1.159, + "args": { + "External id": 979308,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937554329.997, "dur": 4.355, + "args": { + "External id": 979309,"Record function id": 0, "Ev Idx": 1900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937554331.447, "dur": 2.463, + "args": { + "External id": 979310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937554332.118, "dur": 1.353, + "args": { + "External id": 979311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937554332.449, "dur": 0.959, + "args": { + "External id": 979312,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937554337.477, "dur": 6.018, + "args": { + "External id": 979313,"Record function id": 0, "Ev Idx": 1904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937554338.799, "dur": 4.273, + "args": { + "External id": 979314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937554339.284, "dur": 3.073, + "args": { + "External id": 979315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937554339.770, "dur": 2.527, + "args": { + "External id": 979316,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937554347.627, "dur": 80631.313, + "args": { + "External id": 979317,"Record function id": 0, "Sequence number": 10552291, "Fwd thread id": 1, "Ev Idx": 1908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937554349.081, "dur": 80620.476, + "args": { + "External id": 979318,"Sequence number": 10552291, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1909 + } + }, + { + "ph": "f", "id": 175, "pid": 2338706, "tid": 2379422, "ts": 6345937554349.081, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.24)", "pid": 2338706, "tid": 2379422, + "ts": 6345937554384.556, "dur": 43.775, + "args": { + "External id": 979319,"Record function id": 0, "Ev Idx": 1910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.24)", "pid": 2338706, "tid": 2379422, + "ts": 6345937554436.590, "dur": 71.046, + "args": { + "External id": 979320,"Record function id": 0, "Ev Idx": 1911 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.24)", "pid": 2338706, "tid": 2379422, + "ts": 6345937554513.105, "dur": 80448.412, + "args": { + "External id": 979321,"Record function id": 0, "Ev Idx": 1912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937554607.979, "dur": 8.157, + "args": { + "External id": 979322,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937554626.805, "dur": 4.798, + "args": { + "External id": 979323,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345937554648.199, "dur": 79443.473, + "args": { + "External id": 979324,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345937554664.833, "dur": 79411.908, + "args": { + "External id": 979325,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937554791.628, "dur": 18.535, + "args": { + "External id": 979326,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937554831.538, "dur": 79163.862, + "args": { + "External id": 979327,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 1918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937554834.576, "dur": 79159.662, + "args": { + "External id": 979328,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 1919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937554839.782, "dur": 11.418, + "args": { + "External id": 979329,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937554853.239, "dur": 79135.567, + "args": { + "External id": 979330,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 1921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937634207.281, "dur": 12.538, + "args": { + "External id": 979331,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 1922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937634210.883, "dur": 8.582, + "args": { + "External id": 979332,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345937634250.910, "dur": 389.198, + "args": { + "External id": 979333,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 1924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937634285.941, "dur": 348.772, + "args": { + "External id": 979334,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1925, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345937634300.018, "dur": 329.244, + "args": { + "External id": 979335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 1926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937634665.091, "dur": 2.530, + "args": { + "External id": 979336,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1927, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937634728.992, "dur": 6.879, + "args": { + "External id": 979337,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937634786.238, "dur": 3.932, + "args": { + "External id": 979338,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937634807.984, "dur": 1.192, + "args": { + "External id": 979339,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937634822.136, "dur": 1.009, + "args": { + "External id": 979340,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937634837.284, "dur": 1.064, + "args": { + "External id": 979341,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937634849.920, "dur": 3.056, + "args": { + "External id": 979342,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937634865.210, "dur": 1.000, + "args": { + "External id": 979343,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937634879.132, "dur": 3.564, + "args": { + "External id": 979344,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937634896.552, "dur": 0.927, + "args": { + "External id": 979345,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937634994.431, "dur": 3141.769, + "args": { + "External id": 979346,"Record function id": 0, "Ev Idx": 1937 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.23)", "pid": 2338706, "tid": 2379422, + "ts": 6345937635037.816, "dur": 1256.423, + "args": { + "External id": 979347,"Record function id": 0, "Ev Idx": 1938 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338706, "tid": 2379422, + "ts": 6345937635089.030, "dur": 419.843, + "args": { + "External id": 979348,"Record function id": 0, "Ev Idx": 1939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937635193.292, "dur": 7.218, + "args": { + "External id": 979349,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937635204.065, "dur": 1.139, + "args": { + "External id": 979350,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937635207.323, "dur": 1.095, + "args": { + "External id": 979351,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937635210.341, "dur": 1.275, + "args": { + "External id": 979352,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937635213.097, "dur": 1.203, + "args": { + "External id": 979353,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937635215.708, "dur": 1.155, + "args": { + "External id": 979354,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937635218.399, "dur": 3.002, + "args": { + "External id": 979355,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937635224.619, "dur": 0.991, + "args": { + "External id": 979356,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937635227.311, "dur": 3.316, + "args": { + "External id": 979357,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937635232.117, "dur": 1.232, + "args": { + "External id": 979358,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937635298.428, "dur": 173.237, + "args": { + "External id": 979359,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937635317.364, "dur": 149.749, + "args": { + "External id": 979360,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937635340.732, "dur": 19.119, + "args": { + "External id": 979361,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937635364.569, "dur": 71.047, + "args": { + "External id": 979362,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 1953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937635367.098, "dur": 68.154, + "args": { + "External id": 979363,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 1954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635371.485, "dur": 7.340, + "args": { + "External id": 979364,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937635380.342, "dur": 54.292, + "args": { + "External id": 979365,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 1956 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.22", "pid": 2338706, "tid": 2379422, + "ts": 6345937635608.397, "dur": 677.160, + "args": { + "External id": 979366,"Record function id": 0, "Ev Idx": 1957 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338706, "tid": 2379422, + "ts": 6345937635626.945, "dur": 645.703, + "args": { + "External id": 979367,"Record function id": 0, "Ev Idx": 1958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937635686.263, "dur": 4.552, + "args": { + "External id": 979368,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345937635706.739, "dur": 41.441, + "args": { + "External id": 979369,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635712.303, "dur": 1.770, + "args": { + "External id": 979370,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635715.992, "dur": 2.000, + "args": { + "External id": 979371,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635719.506, "dur": 0.523, + "args": { + "External id": 979372,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635721.526, "dur": 2.766, + "args": { + "External id": 979373,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635731.092, "dur": 0.526, + "args": { + "External id": 979374,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635733.512, "dur": 0.354, + "args": { + "External id": 979375,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635735.199, "dur": 0.529, + "args": { + "External id": 979376,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635739.176, "dur": 0.427, + "args": { + "External id": 979377,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635741.239, "dur": 0.540, + "args": { + "External id": 979378,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937635759.199, "dur": 46.399, + "args": { + "External id": 979379,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345937635836.904, "dur": 120.456, + "args": { + "External id": 979380,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 1971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937635847.046, "dur": 3.283, + "args": { + "External id": 979381,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345937635855.754, "dur": 10.806, + "args": { + "External id": 979382,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937635860.517, "dur": 5.592, + "args": { + "External id": 979383,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 1974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635864.190, "dur": 0.548, + "args": { + "External id": 979384,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345937635873.276, "dur": 33.712, + "args": { + "External id": 979385,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635875.233, "dur": 2.735, + "args": { + "External id": 979386,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635880.699, "dur": 0.296, + "args": { + "External id": 979387,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635882.847, "dur": 0.485, + "args": { + "External id": 979388,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635884.654, "dur": 1.405, + "args": { + "External id": 979389,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635887.706, "dur": 0.478, + "args": { + "External id": 979390,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635889.478, "dur": 0.341, + "args": { + "External id": 979391,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635893.190, "dur": 0.381, + "args": { + "External id": 979392,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635895.273, "dur": 0.330, + "args": { + "External id": 979393,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937635897.093, "dur": 2.711, + "args": { + "External id": 979394,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937635917.025, "dur": 32.653, + "args": { + "External id": 979395,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345937636004.360, "dur": 187.495, + "args": { + "External id": 979396,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 1987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937636090.817, "dur": 96.789, + "args": { + "External id": 979397,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1988, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345937636102.449, "dur": 80.731, + "args": { + "External id": 979398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 1989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937636210.840, "dur": 1.847, + "args": { + "External id": 979399,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1990, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937636302.357, "dur": 1805.913, + "args": { + "External id": 979400,"Sequence number": 10552290, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1991 + } + }, + { + "ph": "f", "id": 176, "pid": 2338706, "tid": 2379422, "ts": 6345937636302.357, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937636418.853, "dur": 108.180, + "args": { + "External id": 979401,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 1992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345937636571.087, "dur": 43.063, + "args": { + "External id": 979402,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 1993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345937636634.195, "dur": 49.709, + "args": { + "External id": 979403,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 1994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937636694.235, "dur": 31.122, + "args": { + "External id": 979404,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937636731.993, "dur": 32.400, + "args": { + "External id": 979405,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937636772.975, "dur": 26.689, + "args": { + "External id": 979406,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937636809.693, "dur": 27.700, + "args": { + "External id": 979407,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345937636863.429, "dur": 23.686, + "args": { + "External id": 979408,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 1999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345937636906.786, "dur": 28.975, + "args": { + "External id": 979409,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937636960.842, "dur": 21.501, + "args": { + "External id": 979410,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937636995.618, "dur": 38.232, + "args": { + "External id": 979411,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937637046.650, "dur": 78.083, + "args": { + "External id": 979412,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937637131.777, "dur": 35.277, + "args": { + "External id": 979413,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937637199.201, "dur": 261.688, + "args": { + "External id": 979414,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937637283.909, "dur": 7.561, + "args": { + "External id": 979415,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937637293.464, "dur": 2.593, + "args": { + "External id": 979416,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937637297.674, "dur": 1.417, + "args": { + "External id": 979417,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937637300.278, "dur": 3.487, + "args": { + "External id": 979418,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937637354.355, "dur": 5.403, + "args": { + "External id": 979419,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937637356.437, "dur": 3.080, + "args": { + "External id": 979420,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937637361.995, "dur": 34.802, + "args": { + "External id": 979421,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937637367.386, "dur": 3.665, + "args": { + "External id": 979422,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937637398.511, "dur": 1.406, + "args": { + "External id": 979423,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937637399.257, "dur": 0.555, + "args": { + "External id": 979424,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937637401.280, "dur": 15.705, + "args": { + "External id": 979425,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937637403.368, "dur": 0.427, + "args": { + "External id": 979426,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345937637502.062, "dur": 29.448, + "args": { + "External id": 979427,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937637551.076, "dur": 18.347, + "args": { + "External id": 979428,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937637577.011, "dur": 54.914, + "args": { + "External id": 979429,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937637644.398, "dur": 43.145, + "args": { + "External id": 979430,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937637698.641, "dur": 21.836, + "args": { + "External id": 979431,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937637725.641, "dur": 33.518, + "args": { + "External id": 979432,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937637765.783, "dur": 25.877, + "args": { + "External id": 979433,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937637798.230, "dur": 33.822, + "args": { + "External id": 979434,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345937637854.940, "dur": 25.735, + "args": { + "External id": 979435,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937637897.773, "dur": 24.673, + "args": { + "External id": 979436,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937637940.726, "dur": 17.894, + "args": { + "External id": 979437,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937637976.768, "dur": 13.697, + "args": { + "External id": 979438,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345937638007.051, "dur": 36.486, + "args": { + "External id": 979439,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937638160.409, "dur": 16.291, + "args": { + "External id": 979440,"Record function id": 0, "Ev Idx": 2031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937638164.509, "dur": 11.030, + "args": { + "External id": 979441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937638169.000, "dur": 5.614, + "args": { + "External id": 979442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937638170.501, "dur": 4.023, + "args": { + "External id": 979443,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937638180.496, "dur": 4.758, + "args": { + "External id": 979444,"Record function id": 0, "Ev Idx": 2035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937638181.859, "dur": 2.941, + "args": { + "External id": 979445,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937638182.600, "dur": 1.757, + "args": { + "External id": 979446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937638183.215, "dur": 1.028, + "args": { + "External id": 979447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937638188.502, "dur": 7.575, + "args": { + "External id": 979448,"Record function id": 0, "Ev Idx": 2039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937638190.044, "dur": 5.592, + "args": { + "External id": 979449,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937638190.740, "dur": 4.460, + "args": { + "External id": 979450,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937638191.931, "dur": 3.173, + "args": { + "External id": 979451,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937638199.262, "dur": 4.798, + "args": { + "External id": 979452,"Record function id": 0, "Ev Idx": 2043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937638200.628, "dur": 3.012, + "args": { + "External id": 979453,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937638201.613, "dur": 1.376, + "args": { + "External id": 979454,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937638202.180, "dur": 0.731, + "args": { + "External id": 979455,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937638207.073, "dur": 102.751, + "args": { + "External id": 979456,"Record function id": 0, "Ev Idx": 2047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937638305.520, "dur": 3.476, + "args": { + "External id": 979457,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937638306.637, "dur": 1.456, + "args": { + "External id": 979458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937638307.198, "dur": 0.793, + "args": { + "External id": 979459,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937638313.450, "dur": 7.047, + "args": { + "External id": 979460,"Record function id": 0, "Ev Idx": 2051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937638315.550, "dur": 4.477, + "args": { + "External id": 979461,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937638316.155, "dur": 3.385, + "args": { + "External id": 979462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937638318.880, "dur": 0.560, + "args": { + "External id": 979463,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937638325.220, "dur": 4.287, + "args": { + "External id": 979464,"Record function id": 0, "Ev Idx": 2055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937638326.615, "dur": 2.473, + "args": { + "External id": 979465,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937638327.275, "dur": 1.325, + "args": { + "External id": 979466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937638327.660, "dur": 0.854, + "args": { + "External id": 979467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937638332.814, "dur": 3.701, + "args": { + "External id": 979468,"Record function id": 0, "Ev Idx": 2059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937638333.989, "dur": 2.122, + "args": { + "External id": 979469,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937638334.629, "dur": 1.054, + "args": { + "External id": 979470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937638334.907, "dur": 0.716, + "args": { + "External id": 979471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937638339.620, "dur": 4.001, + "args": { + "External id": 979472,"Record function id": 0, "Ev Idx": 2063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937638340.907, "dur": 2.295, + "args": { + "External id": 979473,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937638341.357, "dur": 1.298, + "args": { + "External id": 979474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937638341.780, "dur": 0.809, + "args": { + "External id": 979475,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937638347.852, "dur": 73838.464, + "args": { + "External id": 979476,"Record function id": 0, "Sequence number": 10552289, "Fwd thread id": 1, "Ev Idx": 2067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937638356.378, "dur": 73821.016, + "args": { + "External id": 979477,"Sequence number": 10552289, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2068 + } + }, + { + "ph": "f", "id": 177, "pid": 2338706, "tid": 2379422, "ts": 6345937638356.378, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.23)", "pid": 2338706, "tid": 2379422, + "ts": 6345937638391.189, "dur": 42.877, + "args": { + "External id": 979478,"Record function id": 0, "Ev Idx": 2069 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.23)", "pid": 2338706, "tid": 2379422, + "ts": 6345937638441.864, "dur": 70.023, + "args": { + "External id": 979479,"Record function id": 0, "Ev Idx": 2070 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.23)", "pid": 2338706, "tid": 2379422, + "ts": 6345937638517.241, "dur": 73651.995, + "args": { + "External id": 979480,"Record function id": 0, "Ev Idx": 2071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937638612.036, "dur": 7.946, + "args": { + "External id": 979481,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937638630.426, "dur": 6.683, + "args": { + "External id": 979482,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345937638656.658, "dur": 72600.416, + "args": { + "External id": 979483,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345937638671.294, "dur": 72572.004, + "args": { + "External id": 979484,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937638764.867, "dur": 18.792, + "args": { + "External id": 979485,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937638803.520, "dur": 72396.157, + "args": { + "External id": 979486,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937638806.906, "dur": 72391.619, + "args": { + "External id": 979487,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937638811.563, "dur": 10.963, + "args": { + "External id": 979488,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937638824.451, "dur": 72368.676, + "args": { + "External id": 979489,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937711368.248, "dur": 12.554, + "args": { + "External id": 979490,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937711371.927, "dur": 8.560, + "args": { + "External id": 979491,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345937711410.303, "dur": 382.800, + "args": { + "External id": 979492,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937711444.360, "dur": 343.854, + "args": { + "External id": 979493,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2084, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345937711457.769, "dur": 324.652, + "args": { + "External id": 979494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937711817.037, "dur": 2.240, + "args": { + "External id": 979495,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2086, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937711877.031, "dur": 6.479, + "args": { + "External id": 979496,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937711931.173, "dur": 1.545, + "args": { + "External id": 979497,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937711952.374, "dur": 3.640, + "args": { + "External id": 979498,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937711969.095, "dur": 1.028, + "args": { + "External id": 979499,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937711984.413, "dur": 0.883, + "args": { + "External id": 979500,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937711998.745, "dur": 1.000, + "args": { + "External id": 979501,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712033.171, "dur": 3.494, + "args": { + "External id": 979502,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712051.269, "dur": 32.480, + "args": { + "External id": 979503,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712102.833, "dur": 1.196, + "args": { + "External id": 979504,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937712203.308, "dur": 2987.058, + "args": { + "External id": 979505,"Record function id": 0, "Ev Idx": 2096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.22)", "pid": 2338706, "tid": 2379422, + "ts": 6345937712224.547, "dur": 1125.522, + "args": { + "External id": 979506,"Record function id": 0, "Ev Idx": 2097 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338706, "tid": 2379422, + "ts": 6345937712238.952, "dur": 341.826, + "args": { + "External id": 979507,"Record function id": 0, "Ev Idx": 2098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937712332.413, "dur": 4.638, + "args": { + "External id": 979508,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937712340.269, "dur": 0.850, + "args": { + "External id": 979509,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937712342.871, "dur": 2.829, + "args": { + "External id": 979510,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937712347.930, "dur": 1.022, + "args": { + "External id": 979511,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937712350.679, "dur": 1.026, + "args": { + "External id": 979512,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937712353.076, "dur": 0.837, + "args": { + "External id": 979513,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937712355.383, "dur": 2.394, + "args": { + "External id": 979514,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937712361.473, "dur": 1.016, + "args": { + "External id": 979515,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937712364.184, "dur": 0.955, + "args": { + "External id": 979516,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937712366.482, "dur": 0.718, + "args": { + "External id": 979517,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937712385.270, "dur": 163.462, + "args": { + "External id": 979518,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937712402.522, "dur": 141.301, + "args": { + "External id": 979519,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937712427.800, "dur": 17.669, + "args": { + "External id": 979520,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937712449.242, "dur": 67.882, + "args": { + "External id": 979521,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937712453.913, "dur": 62.862, + "args": { + "External id": 979522,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712457.889, "dur": 6.503, + "args": { + "External id": 979523,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937712466.203, "dur": 49.973, + "args": { + "External id": 979524,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2115 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.21", "pid": 2338706, "tid": 2379422, + "ts": 6345937712674.890, "dur": 667.088, + "args": { + "External id": 979525,"Record function id": 0, "Ev Idx": 2116 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338706, "tid": 2379422, + "ts": 6345937712693.436, "dur": 634.384, + "args": { + "External id": 979526,"Record function id": 0, "Ev Idx": 2117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937712751.720, "dur": 4.396, + "args": { + "External id": 979527,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345937712772.014, "dur": 36.038, + "args": { + "External id": 979528,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712777.188, "dur": 1.453, + "args": { + "External id": 979529,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712782.276, "dur": 0.308, + "args": { + "External id": 979530,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712784.194, "dur": 0.435, + "args": { + "External id": 979531,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712786.292, "dur": 0.582, + "args": { + "External id": 979532,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712789.281, "dur": 0.447, + "args": { + "External id": 979533,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712791.172, "dur": 2.637, + "args": { + "External id": 979534,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712795.672, "dur": 2.020, + "args": { + "External id": 979535,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712799.146, "dur": 0.341, + "args": { + "External id": 979536,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712800.900, "dur": 0.452, + "args": { + "External id": 979537,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937712818.978, "dur": 45.414, + "args": { + "External id": 979538,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345937712895.717, "dur": 140.279, + "args": { + "External id": 979539,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937712905.349, "dur": 3.626, + "args": { + "External id": 979540,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345937712914.252, "dur": 10.623, + "args": { + "External id": 979541,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937712918.406, "dur": 6.031, + "args": { + "External id": 979542,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712922.303, "dur": 0.743, + "args": { + "External id": 979543,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345937712931.514, "dur": 34.080, + "args": { + "External id": 979544,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712933.961, "dur": 2.400, + "args": { + "External id": 979545,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712938.233, "dur": 0.343, + "args": { + "External id": 979546,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712940.113, "dur": 2.441, + "args": { + "External id": 979547,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712945.539, "dur": 0.256, + "args": { + "External id": 979548,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712947.259, "dur": 0.375, + "args": { + "External id": 979549,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712949.224, "dur": 0.471, + "args": { + "External id": 979550,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712953.172, "dur": 0.394, + "args": { + "External id": 979551,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712954.928, "dur": 0.313, + "args": { + "External id": 979552,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937712956.723, "dur": 2.370, + "args": { + "External id": 979553,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937712978.061, "dur": 48.541, + "args": { + "External id": 979554,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345937713120.725, "dur": 132.087, + "args": { + "External id": 979555,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937713153.614, "dur": 95.826, + "args": { + "External id": 979556,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2147, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345937713165.423, "dur": 79.182, + "args": { + "External id": 979557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937713271.110, "dur": 1.859, + "args": { + "External id": 979558,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2149, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937713357.685, "dur": 1808.167, + "args": { + "External id": 979559,"Sequence number": 10552288, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2150 + } + }, + { + "ph": "f", "id": 178, "pid": 2338706, "tid": 2379422, "ts": 6345937713357.685, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937713473.604, "dur": 104.206, + "args": { + "External id": 979560,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345937713620.785, "dur": 38.716, + "args": { + "External id": 979561,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345937713677.308, "dur": 49.184, + "args": { + "External id": 979562,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937713749.235, "dur": 31.502, + "args": { + "External id": 979563,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937713787.484, "dur": 31.202, + "args": { + "External id": 979564,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937713825.348, "dur": 26.648, + "args": { + "External id": 979565,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937713861.003, "dur": 28.260, + "args": { + "External id": 979566,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345937713919.451, "dur": 21.859, + "args": { + "External id": 979567,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345937713965.098, "dur": 27.674, + "args": { + "External id": 979568,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937714036.257, "dur": 58.995, + "args": { + "External id": 979569,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937714115.004, "dur": 17.710, + "args": { + "External id": 979570,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937714142.694, "dur": 43.949, + "args": { + "External id": 979571,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937714190.674, "dur": 32.828, + "args": { + "External id": 979572,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937714256.757, "dur": 263.049, + "args": { + "External id": 979573,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937714338.283, "dur": 6.629, + "args": { + "External id": 979574,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937714347.028, "dur": 4.051, + "args": { + "External id": 979575,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937714352.692, "dur": 3.657, + "args": { + "External id": 979576,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937714357.827, "dur": 2.047, + "args": { + "External id": 979577,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937714409.564, "dur": 7.909, + "args": { + "External id": 979578,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937714414.397, "dur": 2.877, + "args": { + "External id": 979579,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937714419.413, "dur": 35.362, + "args": { + "External id": 979580,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937714425.552, "dur": 3.767, + "args": { + "External id": 979581,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937714456.343, "dur": 2.106, + "args": { + "External id": 979582,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937714457.619, "dur": 0.742, + "args": { + "External id": 979583,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937714459.825, "dur": 15.412, + "args": { + "External id": 979584,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937714462.060, "dur": 0.704, + "args": { + "External id": 979585,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345937714559.145, "dur": 26.009, + "args": { + "External id": 979586,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937714604.212, "dur": 15.802, + "args": { + "External id": 979587,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937714627.229, "dur": 36.891, + "args": { + "External id": 979588,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937714671.121, "dur": 36.352, + "args": { + "External id": 979589,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937714717.688, "dur": 19.999, + "args": { + "External id": 979590,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937714743.762, "dur": 30.212, + "args": { + "External id": 979591,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937714780.933, "dur": 26.218, + "args": { + "External id": 979592,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937714835.209, "dur": 39.777, + "args": { + "External id": 979593,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345937714898.574, "dur": 23.197, + "args": { + "External id": 979594,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937714939.783, "dur": 23.311, + "args": { + "External id": 979595,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937714980.220, "dur": 15.895, + "args": { + "External id": 979596,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937715040.189, "dur": 53.177, + "args": { + "External id": 979597,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345937715114.970, "dur": 17.583, + "args": { + "External id": 979598,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937715214.030, "dur": 15.673, + "args": { + "External id": 979599,"Record function id": 0, "Ev Idx": 2190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937715217.718, "dur": 10.922, + "args": { + "External id": 979600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937715222.308, "dur": 5.419, + "args": { + "External id": 979601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937715223.770, "dur": 3.862, + "args": { + "External id": 979602,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937715233.655, "dur": 5.098, + "args": { + "External id": 979603,"Record function id": 0, "Ev Idx": 2194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937715235.170, "dur": 3.096, + "args": { + "External id": 979604,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937715236.098, "dur": 1.579, + "args": { + "External id": 979605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937715236.683, "dur": 0.867, + "args": { + "External id": 979606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937715242.248, "dur": 6.589, + "args": { + "External id": 979607,"Record function id": 0, "Ev Idx": 2198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937715243.522, "dur": 4.864, + "args": { + "External id": 979608,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937715244.286, "dur": 3.697, + "args": { + "External id": 979609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937715244.891, "dur": 2.989, + "args": { + "External id": 979610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937715252.168, "dur": 4.640, + "args": { + "External id": 979611,"Record function id": 0, "Ev Idx": 2202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937715253.622, "dur": 2.739, + "args": { + "External id": 979612,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937715254.489, "dur": 1.480, + "args": { + "External id": 979613,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937715255.021, "dur": 0.841, + "args": { + "External id": 979614,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937715259.915, "dur": 4.309, + "args": { + "External id": 979615,"Record function id": 0, "Ev Idx": 2206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937715261.282, "dur": 2.522, + "args": { + "External id": 979616,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937715262.263, "dur": 1.146, + "args": { + "External id": 979617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937715262.676, "dur": 0.666, + "args": { + "External id": 979618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937715267.339, "dur": 4.156, + "args": { + "External id": 979619,"Record function id": 0, "Ev Idx": 2210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937715268.475, "dur": 2.581, + "args": { + "External id": 979620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937715269.028, "dur": 1.497, + "args": { + "External id": 979621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937715269.528, "dur": 0.920, + "args": { + "External id": 979622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937715274.698, "dur": 5.155, + "args": { + "External id": 979623,"Record function id": 0, "Ev Idx": 2214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937715276.508, "dur": 2.900, + "args": { + "External id": 979624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937715277.215, "dur": 1.800, + "args": { + "External id": 979625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937715277.908, "dur": 1.034, + "args": { + "External id": 979626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937715282.931, "dur": 6.549, + "args": { + "External id": 979627,"Record function id": 0, "Ev Idx": 2218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937715284.266, "dur": 4.796, + "args": { + "External id": 979628,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937715284.733, "dur": 3.893, + "args": { + "External id": 979629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937715287.937, "dur": 0.622, + "args": { + "External id": 979630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937715294.020, "dur": 4.633, + "args": { + "External id": 979631,"Record function id": 0, "Ev Idx": 2222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937715295.398, "dur": 2.784, + "args": { + "External id": 979632,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937715295.991, "dur": 1.571, + "args": { + "External id": 979633,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937715296.516, "dur": 0.973, + "args": { + "External id": 979634,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937715302.852, "dur": 71357.963, + "args": { + "External id": 979635,"Record function id": 0, "Sequence number": 10552287, "Fwd thread id": 1, "Ev Idx": 2226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937715304.309, "dur": 71347.299, + "args": { + "External id": 979636,"Sequence number": 10552287, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2227 + } + }, + { + "ph": "f", "id": 179, "pid": 2338706, "tid": 2379422, "ts": 6345937715304.309, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.22)", "pid": 2338706, "tid": 2379422, + "ts": 6345937715333.468, "dur": 41.383, + "args": { + "External id": 979637,"Record function id": 0, "Ev Idx": 2228 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.22)", "pid": 2338706, "tid": 2379422, + "ts": 6345937715382.877, "dur": 68.984, + "args": { + "External id": 979638,"Record function id": 0, "Ev Idx": 2229 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.22)", "pid": 2338706, "tid": 2379422, + "ts": 6345937715457.674, "dur": 71185.587, + "args": { + "External id": 979639,"Record function id": 0, "Ev Idx": 2230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937715550.277, "dur": 8.000, + "args": { + "External id": 979640,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937715568.086, "dur": 6.854, + "args": { + "External id": 979641,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345937715606.638, "dur": 70068.269, + "args": { + "External id": 979642,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345937715623.258, "dur": 70036.939, + "args": { + "External id": 979643,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937715716.770, "dur": 18.310, + "args": { + "External id": 979644,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937715754.625, "dur": 69859.231, + "args": { + "External id": 979645,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937715757.815, "dur": 69854.855, + "args": { + "External id": 979646,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937715762.161, "dur": 12.026, + "args": { + "External id": 979647,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937715776.123, "dur": 69831.190, + "args": { + "External id": 979648,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937785785.834, "dur": 12.527, + "args": { + "External id": 979649,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937785789.508, "dur": 8.500, + "args": { + "External id": 979650,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345937785830.648, "dur": 477.093, + "args": { + "External id": 979651,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937785865.389, "dur": 436.244, + "args": { + "External id": 979652,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2243, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345937785878.968, "dur": 416.106, + "args": { + "External id": 979653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937786337.425, "dur": 2.545, + "args": { + "External id": 979654,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2245, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937786410.227, "dur": 7.080, + "args": { + "External id": 979655,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937786467.198, "dur": 1.472, + "args": { + "External id": 979656,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937786486.746, "dur": 3.733, + "args": { + "External id": 979657,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937786504.273, "dur": 1.120, + "args": { + "External id": 979658,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937786519.073, "dur": 1.029, + "args": { + "External id": 979659,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937786532.753, "dur": 0.835, + "args": { + "External id": 979660,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937786545.479, "dur": 3.110, + "args": { + "External id": 979661,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937786560.590, "dur": 3.707, + "args": { + "External id": 979662,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937786579.019, "dur": 1.087, + "args": { + "External id": 979663,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937786676.740, "dur": 3011.449, + "args": { + "External id": 979664,"Record function id": 0, "Ev Idx": 2255 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.21)", "pid": 2338706, "tid": 2379422, + "ts": 6345937786699.010, "dur": 1133.387, + "args": { + "External id": 979665,"Record function id": 0, "Ev Idx": 2256 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338706, "tid": 2379422, + "ts": 6345937786713.862, "dur": 402.584, + "args": { + "External id": 979666,"Record function id": 0, "Ev Idx": 2257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937786805.146, "dur": 4.177, + "args": { + "External id": 979667,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937786812.659, "dur": 1.124, + "args": { + "External id": 979668,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937786815.623, "dur": 2.962, + "args": { + "External id": 979669,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937786820.428, "dur": 1.048, + "args": { + "External id": 979670,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937786822.925, "dur": 0.993, + "args": { + "External id": 979671,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937786825.468, "dur": 0.809, + "args": { + "External id": 979672,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937786827.957, "dur": 2.836, + "args": { + "External id": 979673,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937786834.360, "dur": 0.841, + "args": { + "External id": 979674,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937786836.814, "dur": 0.814, + "args": { + "External id": 979675,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937786839.470, "dur": 0.981, + "args": { + "External id": 979676,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937786857.997, "dur": 185.453, + "args": { + "External id": 979677,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937786874.656, "dur": 162.811, + "args": { + "External id": 979678,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937786899.552, "dur": 17.889, + "args": { + "External id": 979679,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937786921.312, "dur": 67.086, + "args": { + "External id": 979680,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937786924.507, "dur": 63.466, + "args": { + "External id": 979681,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937786930.206, "dur": 6.431, + "args": { + "External id": 979682,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937786938.332, "dur": 48.890, + "args": { + "External id": 979683,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2274 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.20", "pid": 2338706, "tid": 2379422, + "ts": 6345937787219.704, "dur": 604.392, + "args": { + "External id": 979684,"Record function id": 0, "Ev Idx": 2275 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338706, "tid": 2379422, + "ts": 6345937787238.170, "dur": 572.875, + "args": { + "External id": 979685,"Record function id": 0, "Ev Idx": 2276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937787300.703, "dur": 6.350, + "args": { + "External id": 979686,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345937787323.024, "dur": 38.806, + "args": { + "External id": 979687,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937787328.862, "dur": 1.918, + "args": { + "External id": 979688,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937787333.135, "dur": 2.379, + "args": { + "External id": 979689,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937787336.909, "dur": 0.673, + "args": { + "External id": 979690,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937787338.846, "dur": 0.427, + "args": { + "External id": 979691,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937787342.757, "dur": 0.497, + "args": { + "External id": 979692,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937787344.537, "dur": 2.310, + "args": { + "External id": 979693,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937787348.225, "dur": 0.497, + "args": { + "External id": 979694,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937787351.866, "dur": 0.290, + "args": { + "External id": 979695,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937787353.853, "dur": 0.309, + "args": { + "External id": 979696,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937787372.897, "dur": 48.805, + "args": { + "External id": 979697,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345937787454.355, "dur": 121.240, + "args": { + "External id": 979698,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937787464.785, "dur": 3.641, + "args": { + "External id": 979699,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345937787474.091, "dur": 11.178, + "args": { + "External id": 979700,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937787478.403, "dur": 6.448, + "args": { + "External id": 979701,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937787482.700, "dur": 0.583, + "args": { + "External id": 979702,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345937787491.500, "dur": 33.203, + "args": { + "External id": 979703,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937787493.742, "dur": 0.493, + "args": { + "External id": 979704,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937787497.878, "dur": 0.435, + "args": { + "External id": 979705,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937787499.607, "dur": 3.047, + "args": { + "External id": 979706,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937787503.982, "dur": 2.614, + "args": { + "External id": 979707,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937787508.157, "dur": 0.517, + "args": { + "External id": 979708,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937787510.161, "dur": 0.605, + "args": { + "External id": 979709,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937787514.549, "dur": 0.351, + "args": { + "External id": 979710,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937787516.362, "dur": 0.374, + "args": { + "External id": 979711,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937787518.004, "dur": 0.253, + "args": { + "External id": 979712,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937787535.162, "dur": 32.608, + "args": { + "External id": 979713,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345937787620.061, "dur": 122.865, + "args": { + "External id": 979714,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937787651.197, "dur": 88.556, + "args": { + "External id": 979715,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2306, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345937787662.237, "dur": 73.325, + "args": { + "External id": 979716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937787760.952, "dur": 1.544, + "args": { + "External id": 979717,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2308, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937787839.770, "dur": 1825.477, + "args": { + "External id": 979718,"Sequence number": 10552286, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2309 + } + }, + { + "ph": "f", "id": 180, "pid": 2338706, "tid": 2379422, "ts": 6345937787839.770, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937787953.303, "dur": 163.480, + "args": { + "External id": 979719,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345937788168.247, "dur": 42.389, + "args": { + "External id": 979720,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345937788229.005, "dur": 57.368, + "args": { + "External id": 979721,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937788298.234, "dur": 32.061, + "args": { + "External id": 979722,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937788336.559, "dur": 31.367, + "args": { + "External id": 979723,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937788374.225, "dur": 27.989, + "args": { + "External id": 979724,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937788409.380, "dur": 27.852, + "args": { + "External id": 979725,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345937788466.211, "dur": 22.729, + "args": { + "External id": 979726,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345937788507.938, "dur": 27.464, + "args": { + "External id": 979727,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937788561.544, "dur": 18.285, + "args": { + "External id": 979728,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937788593.626, "dur": 18.823, + "args": { + "External id": 979729,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937788620.253, "dur": 35.401, + "args": { + "External id": 979730,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937788659.059, "dur": 31.443, + "args": { + "External id": 979731,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937788723.062, "dur": 265.782, + "args": { + "External id": 979732,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937788809.219, "dur": 6.336, + "args": { + "External id": 979733,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937788817.535, "dur": 2.276, + "args": { + "External id": 979734,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937788820.982, "dur": 2.886, + "args": { + "External id": 979735,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937788825.260, "dur": 1.940, + "args": { + "External id": 979736,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937788875.060, "dur": 7.447, + "args": { + "External id": 979737,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937788879.385, "dur": 2.910, + "args": { + "External id": 979738,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937788884.687, "dur": 36.890, + "args": { + "External id": 979739,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937788889.953, "dur": 5.425, + "args": { + "External id": 979740,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937788923.126, "dur": 1.805, + "args": { + "External id": 979741,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937788924.029, "dur": 0.816, + "args": { + "External id": 979742,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937788926.310, "dur": 16.814, + "args": { + "External id": 979743,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937788928.787, "dur": 0.535, + "args": { + "External id": 979744,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345937789046.522, "dur": 65.972, + "args": { + "External id": 979745,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937789136.934, "dur": 18.586, + "args": { + "External id": 979746,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937789164.378, "dur": 48.028, + "args": { + "External id": 979747,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937789230.764, "dur": 51.180, + "args": { + "External id": 979748,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937789296.704, "dur": 20.524, + "args": { + "External id": 979749,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937789323.453, "dur": 30.909, + "args": { + "External id": 979750,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937789361.878, "dur": 27.355, + "args": { + "External id": 979751,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937789395.774, "dur": 31.697, + "args": { + "External id": 979752,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345937789451.535, "dur": 27.131, + "args": { + "External id": 979753,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937789507.435, "dur": 27.005, + "args": { + "External id": 979754,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937789553.894, "dur": 16.964, + "args": { + "External id": 979755,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937789587.915, "dur": 14.810, + "args": { + "External id": 979756,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345937789619.589, "dur": 15.647, + "args": { + "External id": 979757,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937789711.057, "dur": 18.018, + "args": { + "External id": 979758,"Record function id": 0, "Ev Idx": 2349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937789714.978, "dur": 13.040, + "args": { + "External id": 979759,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937789719.577, "dur": 7.566, + "args": { + "External id": 979760,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937789721.169, "dur": 5.867, + "args": { + "External id": 979761,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937789732.989, "dur": 5.371, + "args": { + "External id": 979762,"Record function id": 0, "Ev Idx": 2353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937789734.697, "dur": 3.170, + "args": { + "External id": 979763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937789735.583, "dur": 1.687, + "args": { + "External id": 979764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937789736.315, "dur": 0.868, + "args": { + "External id": 979765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937789741.607, "dur": 8.023, + "args": { + "External id": 979766,"Record function id": 0, "Ev Idx": 2357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937789743.158, "dur": 5.986, + "args": { + "External id": 979767,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937789743.872, "dur": 4.504, + "args": { + "External id": 979768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937789744.784, "dur": 3.479, + "args": { + "External id": 979769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937789752.908, "dur": 4.518, + "args": { + "External id": 979770,"Record function id": 0, "Ev Idx": 2361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937789754.193, "dur": 2.791, + "args": { + "External id": 979771,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937789755.008, "dur": 1.557, + "args": { + "External id": 979772,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937789755.614, "dur": 0.865, + "args": { + "External id": 979773,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937789760.482, "dur": 4.252, + "args": { + "External id": 979774,"Record function id": 0, "Ev Idx": 2365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937789761.823, "dur": 2.503, + "args": { + "External id": 979775,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937789762.515, "dur": 1.382, + "args": { + "External id": 979776,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937789762.791, "dur": 1.032, + "args": { + "External id": 979777,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937789767.764, "dur": 4.120, + "args": { + "External id": 979778,"Record function id": 0, "Ev Idx": 2369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937789769.091, "dur": 2.372, + "args": { + "External id": 979779,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937789769.738, "dur": 1.171, + "args": { + "External id": 979780,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937789770.210, "dur": 0.627, + "args": { + "External id": 979781,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937789775.071, "dur": 4.503, + "args": { + "External id": 979782,"Record function id": 0, "Ev Idx": 2373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937789776.274, "dur": 2.886, + "args": { + "External id": 979783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937789777.236, "dur": 1.540, + "args": { + "External id": 979784,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937789777.896, "dur": 0.814, + "args": { + "External id": 979785,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937789782.648, "dur": 7.103, + "args": { + "External id": 979786,"Record function id": 0, "Ev Idx": 2377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937789784.354, "dur": 4.989, + "args": { + "External id": 979787,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937789785.035, "dur": 3.609, + "args": { + "External id": 979788,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937789787.899, "dur": 0.650, + "args": { + "External id": 979789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937789793.918, "dur": 4.360, + "args": { + "External id": 979790,"Record function id": 0, "Ev Idx": 2381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937789795.575, "dur": 2.286, + "args": { + "External id": 979791,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937789796.051, "dur": 1.396, + "args": { + "External id": 979792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937789796.408, "dur": 0.960, + "args": { + "External id": 979793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937789802.265, "dur": 67809.447, + "args": { + "External id": 979794,"Record function id": 0, "Sequence number": 10552285, "Fwd thread id": 1, "Ev Idx": 2385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937789803.804, "dur": 67797.439, + "args": { + "External id": 979795,"Sequence number": 10552285, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2386 + } + }, + { + "ph": "f", "id": 181, "pid": 2338706, "tid": 2379422, "ts": 6345937789803.804, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.21)", "pid": 2338706, "tid": 2379422, + "ts": 6345937789835.572, "dur": 40.308, + "args": { + "External id": 979796,"Record function id": 0, "Ev Idx": 2387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.21)", "pid": 2338706, "tid": 2379422, + "ts": 6345937789883.601, "dur": 64.574, + "args": { + "External id": 979797,"Record function id": 0, "Ev Idx": 2388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.21)", "pid": 2338706, "tid": 2379422, + "ts": 6345937789953.923, "dur": 67639.178, + "args": { + "External id": 979798,"Record function id": 0, "Ev Idx": 2389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937790097.716, "dur": 9.094, + "args": { + "External id": 979799,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937790119.524, "dur": 7.226, + "args": { + "External id": 979800,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345937790142.360, "dur": 66499.254, + "args": { + "External id": 979801,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345937790172.447, "dur": 66454.576, + "args": { + "External id": 979802,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937790262.294, "dur": 18.193, + "args": { + "External id": 979803,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937790299.956, "dur": 66279.712, + "args": { + "External id": 979804,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937790306.358, "dur": 66272.071, + "args": { + "External id": 979805,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937790311.301, "dur": 10.765, + "args": { + "External id": 979806,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937790323.874, "dur": 66249.620, + "args": { + "External id": 979807,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937856755.049, "dur": 13.512, + "args": { + "External id": 979808,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937856759.087, "dur": 9.017, + "args": { + "External id": 979809,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345937856802.694, "dur": 433.617, + "args": { + "External id": 979810,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937856838.571, "dur": 391.925, + "args": { + "External id": 979811,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2402, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345937856852.645, "dur": 371.631, + "args": { + "External id": 979812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937857286.814, "dur": 2.628, + "args": { + "External id": 979813,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2404, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937857358.986, "dur": 7.730, + "args": { + "External id": 979814,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937857416.149, "dur": 1.940, + "args": { + "External id": 979815,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937857436.642, "dur": 4.070, + "args": { + "External id": 979816,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937857454.116, "dur": 1.200, + "args": { + "External id": 979817,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937857469.187, "dur": 1.094, + "args": { + "External id": 979818,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937857482.525, "dur": 1.025, + "args": { + "External id": 979819,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937857495.420, "dur": 3.343, + "args": { + "External id": 979820,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937857511.338, "dur": 3.343, + "args": { + "External id": 979821,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937857528.267, "dur": 1.109, + "args": { + "External id": 979822,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937857627.447, "dur": 3132.372, + "args": { + "External id": 979823,"Record function id": 0, "Ev Idx": 2414 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.20)", "pid": 2338706, "tid": 2379422, + "ts": 6345937857649.670, "dur": 1161.846, + "args": { + "External id": 979824,"Record function id": 0, "Ev Idx": 2415 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338706, "tid": 2379422, + "ts": 6345937857665.321, "dur": 376.060, + "args": { + "External id": 979825,"Record function id": 0, "Ev Idx": 2416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937857760.027, "dur": 4.250, + "args": { + "External id": 979826,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937857767.627, "dur": 1.096, + "args": { + "External id": 979827,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937857770.935, "dur": 3.807, + "args": { + "External id": 979828,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937857776.950, "dur": 0.817, + "args": { + "External id": 979829,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937857779.330, "dur": 1.104, + "args": { + "External id": 979830,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937857781.952, "dur": 1.152, + "args": { + "External id": 979831,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937857784.546, "dur": 2.977, + "args": { + "External id": 979832,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937857791.268, "dur": 0.939, + "args": { + "External id": 979833,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937857793.937, "dur": 1.174, + "args": { + "External id": 979834,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937857796.619, "dur": 1.248, + "args": { + "External id": 979835,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937857816.801, "dur": 168.676, + "args": { + "External id": 979836,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937857833.798, "dur": 143.655, + "args": { + "External id": 979837,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937857856.014, "dur": 18.852, + "args": { + "External id": 979838,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937857878.522, "dur": 69.222, + "args": { + "External id": 979839,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937857881.477, "dur": 65.907, + "args": { + "External id": 979840,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937857885.843, "dur": 6.091, + "args": { + "External id": 979841,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937857893.770, "dur": 53.059, + "args": { + "External id": 979842,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2433 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.19", "pid": 2338706, "tid": 2379422, + "ts": 6345937858182.791, "dur": 620.410, + "args": { + "External id": 979843,"Record function id": 0, "Ev Idx": 2434 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338706, "tid": 2379422, + "ts": 6345937858202.995, "dur": 586.805, + "args": { + "External id": 979844,"Record function id": 0, "Ev Idx": 2435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937858271.160, "dur": 6.258, + "args": { + "External id": 979845,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345937858293.817, "dur": 39.819, + "args": { + "External id": 979846,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937858299.703, "dur": 3.613, + "args": { + "External id": 979847,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937858305.531, "dur": 0.530, + "args": { + "External id": 979848,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937858307.697, "dur": 0.704, + "args": { + "External id": 979849,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937858311.696, "dur": 0.641, + "args": { + "External id": 979850,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937858314.001, "dur": 0.650, + "args": { + "External id": 979851,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937858316.078, "dur": 2.616, + "args": { + "External id": 979852,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937858321.648, "dur": 0.354, + "args": { + "External id": 979853,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937858323.287, "dur": 0.372, + "args": { + "External id": 979854,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937858325.147, "dur": 1.419, + "args": { + "External id": 979855,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937858345.843, "dur": 47.429, + "args": { + "External id": 979856,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345937858426.207, "dur": 120.788, + "args": { + "External id": 979857,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937858436.808, "dur": 3.393, + "args": { + "External id": 979858,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345937858445.580, "dur": 11.026, + "args": { + "External id": 979859,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937858450.232, "dur": 5.905, + "args": { + "External id": 979860,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937858454.269, "dur": 0.583, + "args": { + "External id": 979861,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345937858463.579, "dur": 31.122, + "args": { + "External id": 979862,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937858465.945, "dur": 0.540, + "args": { + "External id": 979863,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937858468.286, "dur": 0.701, + "args": { + "External id": 979864,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937858470.290, "dur": 3.059, + "args": { + "External id": 979865,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937858474.852, "dur": 0.576, + "args": { + "External id": 979866,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937858477.097, "dur": 0.524, + "args": { + "External id": 979867,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937858480.327, "dur": 0.283, + "args": { + "External id": 979868,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937858481.966, "dur": 0.401, + "args": { + "External id": 979869,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937858483.733, "dur": 0.453, + "args": { + "External id": 979870,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937858487.323, "dur": 0.607, + "args": { + "External id": 979871,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937858507.393, "dur": 31.063, + "args": { + "External id": 979872,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345937858593.464, "dur": 124.115, + "args": { + "External id": 979873,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937858624.777, "dur": 89.297, + "args": { + "External id": 979874,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2465, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345937858636.761, "dur": 72.829, + "args": { + "External id": 979875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937858735.987, "dur": 1.985, + "args": { + "External id": 979876,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2467, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937858819.439, "dur": 1914.716, + "args": { + "External id": 979877,"Sequence number": 10552284, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2468 + } + }, + { + "ph": "f", "id": 182, "pid": 2338706, "tid": 2379422, "ts": 6345937858819.439, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937858935.774, "dur": 167.508, + "args": { + "External id": 979878,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345937859154.210, "dur": 43.831, + "args": { + "External id": 979879,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345937859218.108, "dur": 58.534, + "args": { + "External id": 979880,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937859289.263, "dur": 31.903, + "args": { + "External id": 979881,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937859327.268, "dur": 33.874, + "args": { + "External id": 979882,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937859367.399, "dur": 27.066, + "args": { + "External id": 979883,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937859402.465, "dur": 29.906, + "args": { + "External id": 979884,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345937859459.255, "dur": 25.958, + "args": { + "External id": 979885,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345937859504.919, "dur": 30.360, + "args": { + "External id": 979886,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937859561.044, "dur": 21.392, + "args": { + "External id": 979887,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937859599.701, "dur": 17.438, + "args": { + "External id": 979888,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937859625.435, "dur": 35.296, + "args": { + "External id": 979889,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937859664.224, "dur": 32.739, + "args": { + "External id": 979890,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937859729.840, "dur": 264.108, + "args": { + "External id": 979891,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937859817.564, "dur": 6.652, + "args": { + "External id": 979892,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937859826.490, "dur": 3.053, + "args": { + "External id": 979893,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937859830.809, "dur": 2.788, + "args": { + "External id": 979894,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937859834.901, "dur": 1.849, + "args": { + "External id": 979895,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937859883.315, "dur": 9.266, + "args": { + "External id": 979896,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937859889.387, "dur": 3.056, + "args": { + "External id": 979897,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937859894.757, "dur": 33.600, + "args": { + "External id": 979898,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937859900.782, "dur": 3.728, + "args": { + "External id": 979899,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937859930.068, "dur": 1.978, + "args": { + "External id": 979900,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937859931.328, "dur": 0.631, + "args": { + "External id": 979901,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937859933.488, "dur": 14.381, + "args": { + "External id": 979902,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937859935.703, "dur": 0.561, + "args": { + "External id": 979903,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345937860093.142, "dur": 31.882, + "args": { + "External id": 979904,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937860147.718, "dur": 17.128, + "args": { + "External id": 979905,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937860173.844, "dur": 49.578, + "args": { + "External id": 979906,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937860230.170, "dur": 53.774, + "args": { + "External id": 979907,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937860302.767, "dur": 24.032, + "args": { + "External id": 979908,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937860333.221, "dur": 32.739, + "args": { + "External id": 979909,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937860376.598, "dur": 27.274, + "args": { + "External id": 979910,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937860410.594, "dur": 34.208, + "args": { + "External id": 979911,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345937860469.716, "dur": 25.762, + "args": { + "External id": 979912,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937860566.602, "dur": 28.943, + "args": { + "External id": 979913,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937860615.728, "dur": 19.390, + "args": { + "External id": 979914,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937860652.333, "dur": 16.227, + "args": { + "External id": 979915,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345937860684.741, "dur": 16.625, + "args": { + "External id": 979916,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937860783.542, "dur": 15.991, + "args": { + "External id": 979917,"Record function id": 0, "Ev Idx": 2508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937860786.960, "dur": 11.556, + "args": { + "External id": 979918,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937860791.628, "dur": 6.050, + "args": { + "External id": 979919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937860793.245, "dur": 4.342, + "args": { + "External id": 979920,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937860803.384, "dur": 6.168, + "args": { + "External id": 979921,"Record function id": 0, "Ev Idx": 2512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937860805.173, "dur": 3.913, + "args": { + "External id": 979922,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937860806.335, "dur": 2.091, + "args": { + "External id": 979923,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937860807.340, "dur": 0.971, + "args": { + "External id": 979924,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937860812.771, "dur": 7.895, + "args": { + "External id": 979925,"Record function id": 0, "Ev Idx": 2516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937860814.058, "dur": 6.110, + "args": { + "External id": 979926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937860815.129, "dur": 4.426, + "args": { + "External id": 979927,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937860816.032, "dur": 3.415, + "args": { + "External id": 979928,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937860823.877, "dur": 4.963, + "args": { + "External id": 979929,"Record function id": 0, "Ev Idx": 2520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937860825.501, "dur": 2.924, + "args": { + "External id": 979930,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937860826.219, "dur": 1.524, + "args": { + "External id": 979931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937860826.745, "dur": 0.925, + "args": { + "External id": 979932,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937860832.160, "dur": 3.914, + "args": { + "External id": 979933,"Record function id": 0, "Ev Idx": 2524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937860833.283, "dur": 2.368, + "args": { + "External id": 979934,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937860833.920, "dur": 1.333, + "args": { + "External id": 979935,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937860834.528, "dur": 0.652, + "args": { + "External id": 979936,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937860839.373, "dur": 3.973, + "args": { + "External id": 979937,"Record function id": 0, "Ev Idx": 2528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937860840.798, "dur": 2.141, + "args": { + "External id": 979938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937860841.390, "dur": 1.155, + "args": { + "External id": 979939,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937860841.816, "dur": 0.654, + "args": { + "External id": 979940,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937860846.598, "dur": 4.228, + "args": { + "External id": 979941,"Record function id": 0, "Ev Idx": 2532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937860847.874, "dur": 2.546, + "args": { + "External id": 979942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937860848.752, "dur": 1.110, + "args": { + "External id": 979943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937860849.137, "dur": 0.660, + "args": { + "External id": 979944,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937860854.100, "dur": 6.513, + "args": { + "External id": 979945,"Record function id": 0, "Ev Idx": 2536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937860855.173, "dur": 5.024, + "args": { + "External id": 979946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937860855.950, "dur": 3.703, + "args": { + "External id": 979947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937860858.770, "dur": 0.810, + "args": { + "External id": 979948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937860864.037, "dur": 4.703, + "args": { + "External id": 979949,"Record function id": 0, "Ev Idx": 2540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937860865.308, "dur": 3.005, + "args": { + "External id": 979950,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937860866.331, "dur": 1.560, + "args": { + "External id": 979951,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937860867.027, "dur": 0.786, + "args": { + "External id": 979952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937860873.368, "dur": 63700.947, + "args": { + "External id": 979953,"Record function id": 0, "Sequence number": 10552283, "Fwd thread id": 1, "Ev Idx": 2544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937860875.154, "dur": 63689.814, + "args": { + "External id": 979954,"Sequence number": 10552283, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2545 + } + }, + { + "ph": "f", "id": 183, "pid": 2338706, "tid": 2379422, "ts": 6345937860875.154, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.20)", "pid": 2338706, "tid": 2379422, + "ts": 6345937860907.885, "dur": 40.968, + "args": { + "External id": 979955,"Record function id": 0, "Ev Idx": 2546 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.20)", "pid": 2338706, "tid": 2379422, + "ts": 6345937860956.699, "dur": 95.247, + "args": { + "External id": 979956,"Record function id": 0, "Ev Idx": 2547 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.20)", "pid": 2338706, "tid": 2379422, + "ts": 6345937861094.788, "dur": 63461.796, + "args": { + "External id": 979957,"Record function id": 0, "Ev Idx": 2548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937861192.584, "dur": 8.963, + "args": { + "External id": 979958,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937861212.641, "dur": 7.182, + "args": { + "External id": 979959,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345937861235.209, "dur": 62246.417, + "args": { + "External id": 979960,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345937861251.539, "dur": 62216.078, + "args": { + "External id": 979961,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937861341.362, "dur": 18.701, + "args": { + "External id": 979962,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937861382.181, "dur": 62039.592, + "args": { + "External id": 979963,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937861385.392, "dur": 62035.296, + "args": { + "External id": 979964,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937861391.466, "dur": 10.073, + "args": { + "External id": 979965,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937861420.787, "dur": 61995.184, + "args": { + "External id": 979966,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937923588.551, "dur": 12.968, + "args": { + "External id": 979967,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937923592.660, "dur": 8.407, + "args": { + "External id": 979968,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345937923732.792, "dur": 490.308, + "args": { + "External id": 979969,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937923769.252, "dur": 447.971, + "args": { + "External id": 979970,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2561, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345937923783.212, "dur": 427.470, + "args": { + "External id": 979971,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937924250.412, "dur": 2.528, + "args": { + "External id": 979972,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2563, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937924326.779, "dur": 7.766, + "args": { + "External id": 979973,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937924384.172, "dur": 1.584, + "args": { + "External id": 979974,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937924403.758, "dur": 4.409, + "args": { + "External id": 979975,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937924421.643, "dur": 0.969, + "args": { + "External id": 979976,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937924436.693, "dur": 0.974, + "args": { + "External id": 979977,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937924449.296, "dur": 0.870, + "args": { + "External id": 979978,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937924461.931, "dur": 3.501, + "args": { + "External id": 979979,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937924477.343, "dur": 3.202, + "args": { + "External id": 979980,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937924493.023, "dur": 1.820, + "args": { + "External id": 979981,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937924588.763, "dur": 3045.080, + "args": { + "External id": 979982,"Record function id": 0, "Ev Idx": 2573 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.19)", "pid": 2338706, "tid": 2379422, + "ts": 6345937924610.708, "dur": 1122.920, + "args": { + "External id": 979983,"Record function id": 0, "Ev Idx": 2574 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338706, "tid": 2379422, + "ts": 6345937924625.759, "dur": 335.916, + "args": { + "External id": 979984,"Record function id": 0, "Ev Idx": 2575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937924709.103, "dur": 4.365, + "args": { + "External id": 979985,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937924716.630, "dur": 0.885, + "args": { + "External id": 979986,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937924719.651, "dur": 3.402, + "args": { + "External id": 979987,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937924724.781, "dur": 1.316, + "args": { + "External id": 979988,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937924727.672, "dur": 0.871, + "args": { + "External id": 979989,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937924729.883, "dur": 0.903, + "args": { + "External id": 979990,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937924732.420, "dur": 2.745, + "args": { + "External id": 979991,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937924738.746, "dur": 1.062, + "args": { + "External id": 979992,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937924741.295, "dur": 0.840, + "args": { + "External id": 979993,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937924743.729, "dur": 0.939, + "args": { + "External id": 979994,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937924762.089, "dur": 167.579, + "args": { + "External id": 979995,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937924779.162, "dur": 145.887, + "args": { + "External id": 979996,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937924798.653, "dur": 17.912, + "args": { + "External id": 979997,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937924822.618, "dur": 71.466, + "args": { + "External id": 979998,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937924825.638, "dur": 68.091, + "args": { + "External id": 979999,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937924830.008, "dur": 6.040, + "args": { + "External id": 980000,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937924837.702, "dur": 55.545, + "args": { + "External id": 980001,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2592 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.18", "pid": 2338706, "tid": 2379422, + "ts": 6345937925118.183, "dur": 607.764, + "args": { + "External id": 980002,"Record function id": 0, "Ev Idx": 2593 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338706, "tid": 2379422, + "ts": 6345937925137.761, "dur": 575.620, + "args": { + "External id": 980003,"Record function id": 0, "Ev Idx": 2594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937925204.196, "dur": 6.484, + "args": { + "External id": 980004,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345937925227.255, "dur": 37.815, + "args": { + "External id": 980005,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937925232.542, "dur": 1.932, + "args": { + "External id": 980006,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937925236.712, "dur": 1.256, + "args": { + "External id": 980007,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937925239.518, "dur": 0.400, + "args": { + "External id": 980008,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937925241.601, "dur": 0.555, + "args": { + "External id": 980009,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937925244.941, "dur": 0.557, + "args": { + "External id": 980010,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937925247.043, "dur": 2.689, + "args": { + "External id": 980011,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937925251.530, "dur": 0.471, + "args": { + "External id": 980012,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937925254.559, "dur": 0.317, + "args": { + "External id": 980013,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937925256.479, "dur": 0.580, + "args": { + "External id": 980014,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937925276.861, "dur": 48.502, + "args": { + "External id": 980015,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345937925357.603, "dur": 117.298, + "args": { + "External id": 980016,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937925368.134, "dur": 4.381, + "args": { + "External id": 980017,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345937925377.890, "dur": 11.048, + "args": { + "External id": 980018,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937925382.525, "dur": 5.985, + "args": { + "External id": 980019,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937925386.409, "dur": 0.753, + "args": { + "External id": 980020,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345937925395.612, "dur": 29.953, + "args": { + "External id": 980021,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937925397.792, "dur": 0.527, + "args": { + "External id": 980022,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937925400.777, "dur": 0.319, + "args": { + "External id": 980023,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937925403.057, "dur": 2.428, + "args": { + "External id": 980024,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937925406.823, "dur": 0.634, + "args": { + "External id": 980025,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937925408.820, "dur": 0.646, + "args": { + "External id": 980026,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937925410.959, "dur": 0.642, + "args": { + "External id": 980027,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937925413.568, "dur": 0.602, + "args": { + "External id": 980028,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937925416.207, "dur": 0.416, + "args": { + "External id": 980029,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937925418.438, "dur": 0.600, + "args": { + "External id": 980030,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937925435.171, "dur": 32.327, + "args": { + "External id": 980031,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345937925520.940, "dur": 121.391, + "args": { + "External id": 980032,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937925549.875, "dur": 88.983, + "args": { + "External id": 980033,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2624, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345937925559.827, "dur": 74.570, + "args": { + "External id": 980034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937925661.434, "dur": 2.043, + "args": { + "External id": 980035,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2626, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937925741.741, "dur": 1866.052, + "args": { + "External id": 980036,"Sequence number": 10552282, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2627 + } + }, + { + "ph": "f", "id": 184, "pid": 2338706, "tid": 2379422, "ts": 6345937925741.741, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937925852.006, "dur": 105.608, + "args": { + "External id": 980037,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345937926001.379, "dur": 99.810, + "args": { + "External id": 980038,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345937926124.604, "dur": 58.042, + "args": { + "External id": 980039,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937926195.985, "dur": 32.835, + "args": { + "External id": 980040,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937926235.624, "dur": 33.190, + "args": { + "External id": 980041,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937926275.981, "dur": 27.059, + "args": { + "External id": 980042,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937926310.395, "dur": 29.129, + "args": { + "External id": 980043,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345937926372.101, "dur": 26.769, + "args": { + "External id": 980044,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345937926420.923, "dur": 31.621, + "args": { + "External id": 980045,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937926477.299, "dur": 21.678, + "args": { + "External id": 980046,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937926514.500, "dur": 17.086, + "args": { + "External id": 980047,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937926541.153, "dur": 38.457, + "args": { + "External id": 980048,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937926583.739, "dur": 33.348, + "args": { + "External id": 980049,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937926649.464, "dur": 265.227, + "args": { + "External id": 980050,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937926734.161, "dur": 6.204, + "args": { + "External id": 980051,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937926742.589, "dur": 3.634, + "args": { + "External id": 980052,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937926747.768, "dur": 2.604, + "args": { + "External id": 980053,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937926751.984, "dur": 2.098, + "args": { + "External id": 980054,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937926803.204, "dur": 5.624, + "args": { + "External id": 980055,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937926805.723, "dur": 2.898, + "args": { + "External id": 980056,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937926811.459, "dur": 36.285, + "args": { + "External id": 980057,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937926817.286, "dur": 4.236, + "args": { + "External id": 980058,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937926849.587, "dur": 2.184, + "args": { + "External id": 980059,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937926850.748, "dur": 0.927, + "args": { + "External id": 980060,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937926853.675, "dur": 14.959, + "args": { + "External id": 980061,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937926855.904, "dur": 0.859, + "args": { + "External id": 980062,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345937926953.883, "dur": 26.624, + "args": { + "External id": 980063,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937926997.635, "dur": 35.687, + "args": { + "External id": 980064,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937927045.830, "dur": 83.776, + "args": { + "External id": 980065,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937927140.555, "dur": 42.311, + "args": { + "External id": 980066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937927208.536, "dur": 34.922, + "args": { + "External id": 980067,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937927254.149, "dur": 35.240, + "args": { + "External id": 980068,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937927298.408, "dur": 31.289, + "args": { + "External id": 980069,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937927337.486, "dur": 31.892, + "args": { + "External id": 980070,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345937927395.559, "dur": 25.484, + "args": { + "External id": 980071,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937927443.336, "dur": 26.081, + "args": { + "External id": 980072,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937927485.465, "dur": 23.555, + "args": { + "External id": 980073,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937927526.793, "dur": 16.609, + "args": { + "External id": 980074,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345937927557.296, "dur": 18.171, + "args": { + "External id": 980075,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937927656.719, "dur": 17.352, + "args": { + "External id": 980076,"Record function id": 0, "Ev Idx": 2667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937927660.440, "dur": 12.679, + "args": { + "External id": 980077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937927665.197, "dur": 6.860, + "args": { + "External id": 980078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937927667.534, "dur": 4.397, + "args": { + "External id": 980079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937927678.151, "dur": 6.332, + "args": { + "External id": 980080,"Record function id": 0, "Ev Idx": 2671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937927680.060, "dur": 3.950, + "args": { + "External id": 980081,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937927681.311, "dur": 2.151, + "args": { + "External id": 980082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937927682.064, "dur": 1.282, + "args": { + "External id": 980083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937927687.796, "dur": 7.527, + "args": { + "External id": 980084,"Record function id": 0, "Ev Idx": 2675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937927689.599, "dur": 5.263, + "args": { + "External id": 980085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937927690.365, "dur": 4.053, + "args": { + "External id": 980086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937927690.894, "dur": 3.456, + "args": { + "External id": 980087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937927698.826, "dur": 4.270, + "args": { + "External id": 980088,"Record function id": 0, "Ev Idx": 2679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937927700.319, "dur": 2.304, + "args": { + "External id": 980089,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937927701.057, "dur": 1.154, + "args": { + "External id": 980090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937927701.455, "dur": 0.689, + "args": { + "External id": 980091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937927706.508, "dur": 4.141, + "args": { + "External id": 980092,"Record function id": 0, "Ev Idx": 2683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937927707.842, "dur": 2.383, + "args": { + "External id": 980093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937927708.479, "dur": 1.271, + "args": { + "External id": 980094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937927709.024, "dur": 0.661, + "args": { + "External id": 980095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937927713.866, "dur": 4.482, + "args": { + "External id": 980096,"Record function id": 0, "Ev Idx": 2687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937927715.273, "dur": 2.616, + "args": { + "External id": 980097,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937927716.128, "dur": 1.274, + "args": { + "External id": 980098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937927716.606, "dur": 0.695, + "args": { + "External id": 980099,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937927722.099, "dur": 4.198, + "args": { + "External id": 980100,"Record function id": 0, "Ev Idx": 2691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937927723.426, "dur": 2.473, + "args": { + "External id": 980101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937927723.970, "dur": 1.360, + "args": { + "External id": 980102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937927724.491, "dur": 0.758, + "args": { + "External id": 980103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937927729.619, "dur": 3.824, + "args": { + "External id": 980104,"Record function id": 0, "Ev Idx": 2695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937927730.877, "dur": 2.152, + "args": { + "External id": 980105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937927731.571, "dur": 1.037, + "args": { + "External id": 980106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937927731.826, "dur": 0.676, + "args": { + "External id": 980107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937927736.622, "dur": 4.461, + "args": { + "External id": 980108,"Record function id": 0, "Ev Idx": 2699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937927737.962, "dur": 2.700, + "args": { + "External id": 980109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937927738.433, "dur": 1.698, + "args": { + "External id": 980110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937927739.323, "dur": 0.695, + "args": { + "External id": 980111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937927745.009, "dur": 62365.777, + "args": { + "External id": 980112,"Record function id": 0, "Sequence number": 10552281, "Fwd thread id": 1, "Ev Idx": 2703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937927746.718, "dur": 62353.544, + "args": { + "External id": 980113,"Sequence number": 10552281, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2704 + } + }, + { + "ph": "f", "id": 185, "pid": 2338706, "tid": 2379422, "ts": 6345937927746.718, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.19)", "pid": 2338706, "tid": 2379422, + "ts": 6345937927775.685, "dur": 40.422, + "args": { + "External id": 980114,"Record function id": 0, "Ev Idx": 2705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.19)", "pid": 2338706, "tid": 2379422, + "ts": 6345937927824.177, "dur": 67.958, + "args": { + "External id": 980115,"Record function id": 0, "Ev Idx": 2706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.19)", "pid": 2338706, "tid": 2379422, + "ts": 6345937927897.764, "dur": 62191.904, + "args": { + "External id": 980116,"Record function id": 0, "Ev Idx": 2707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937927988.470, "dur": 6.865, + "args": { + "External id": 980117,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937928005.486, "dur": 25.442, + "args": { + "External id": 980118,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345937928049.933, "dur": 61123.521, + "args": { + "External id": 980119,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345937928101.002, "dur": 61057.671, + "args": { + "External id": 980120,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937928225.421, "dur": 20.474, + "args": { + "External id": 980121,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937928266.119, "dur": 60842.041, + "args": { + "External id": 980122,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937928269.631, "dur": 60837.526, + "args": { + "External id": 980123,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937928274.453, "dur": 12.380, + "args": { + "External id": 980124,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937928289.282, "dur": 60812.537, + "args": { + "External id": 980125,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937989285.982, "dur": 13.541, + "args": { + "External id": 980126,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937989290.480, "dur": 8.579, + "args": { + "External id": 980127,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345937989332.942, "dur": 374.460, + "args": { + "External id": 980128,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937989361.553, "dur": 340.984, + "args": { + "External id": 980129,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2720, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345937989373.153, "dur": 324.462, + "args": { + "External id": 980130,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937989726.122, "dur": 2.378, + "args": { + "External id": 980131,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2722, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937989786.353, "dur": 7.107, + "args": { + "External id": 980132,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937989843.628, "dur": 1.809, + "args": { + "External id": 980133,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937989862.494, "dur": 2.807, + "args": { + "External id": 980134,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937989878.148, "dur": 1.104, + "args": { + "External id": 980135,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937989891.978, "dur": 1.051, + "args": { + "External id": 980136,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937989904.902, "dur": 1.361, + "args": { + "External id": 980137,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937989918.900, "dur": 3.636, + "args": { + "External id": 980138,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937989935.967, "dur": 3.863, + "args": { + "External id": 980139,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937989970.971, "dur": 1.454, + "args": { + "External id": 980140,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937990128.962, "dur": 2981.615, + "args": { + "External id": 980141,"Record function id": 0, "Ev Idx": 2732 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.18)", "pid": 2338706, "tid": 2379422, + "ts": 6345937990152.151, "dur": 1118.860, + "args": { + "External id": 980142,"Record function id": 0, "Ev Idx": 2733 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338706, "tid": 2379422, + "ts": 6345937990167.969, "dur": 344.836, + "args": { + "External id": 980143,"Record function id": 0, "Ev Idx": 2734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937990264.927, "dur": 5.182, + "args": { + "External id": 980144,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937990273.508, "dur": 1.746, + "args": { + "External id": 980145,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937990277.287, "dur": 3.647, + "args": { + "External id": 980146,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937990282.453, "dur": 1.613, + "args": { + "External id": 980147,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937990285.827, "dur": 1.321, + "args": { + "External id": 980148,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937990288.847, "dur": 1.142, + "args": { + "External id": 980149,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937990291.942, "dur": 2.116, + "args": { + "External id": 980150,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937990295.569, "dur": 0.766, + "args": { + "External id": 980151,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937990298.004, "dur": 1.067, + "args": { + "External id": 980152,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937990300.786, "dur": 1.020, + "args": { + "External id": 980153,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937990320.178, "dur": 159.859, + "args": { + "External id": 980154,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937990337.351, "dur": 138.309, + "args": { + "External id": 980155,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937990355.308, "dur": 18.183, + "args": { + "External id": 980156,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937990377.742, "dur": 67.075, + "args": { + "External id": 980157,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937990380.922, "dur": 63.539, + "args": { + "External id": 980158,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990385.147, "dur": 6.369, + "args": { + "External id": 980159,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937990393.660, "dur": 50.095, + "args": { + "External id": 980160,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2751 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.17", "pid": 2338706, "tid": 2379422, + "ts": 6345937990604.505, "dur": 657.602, + "args": { + "External id": 980161,"Record function id": 0, "Ev Idx": 2752 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338706, "tid": 2379422, + "ts": 6345937990622.303, "dur": 625.902, + "args": { + "External id": 980162,"Record function id": 0, "Ev Idx": 2753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937990684.167, "dur": 5.187, + "args": { + "External id": 980163,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345937990705.274, "dur": 34.162, + "args": { + "External id": 980164,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990711.303, "dur": 1.593, + "args": { + "External id": 980165,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990715.515, "dur": 0.534, + "args": { + "External id": 980166,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990718.074, "dur": 0.569, + "args": { + "External id": 980167,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990720.496, "dur": 0.538, + "args": { + "External id": 980168,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990722.984, "dur": 0.454, + "args": { + "External id": 980169,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990725.360, "dur": 2.463, + "args": { + "External id": 980170,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990729.832, "dur": 0.428, + "args": { + "External id": 980171,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990732.127, "dur": 0.579, + "args": { + "External id": 980172,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990734.609, "dur": 0.494, + "args": { + "External id": 980173,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937990754.111, "dur": 46.266, + "args": { + "External id": 980174,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345937990833.058, "dur": 118.166, + "args": { + "External id": 980175,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937990843.052, "dur": 3.519, + "args": { + "External id": 980176,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345937990852.392, "dur": 11.231, + "args": { + "External id": 980177,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345937990856.863, "dur": 6.332, + "args": { + "External id": 980178,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990861.390, "dur": 0.577, + "args": { + "External id": 980179,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345937990870.982, "dur": 28.418, + "args": { + "External id": 980180,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990873.311, "dur": 0.503, + "args": { + "External id": 980181,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990876.477, "dur": 0.490, + "args": { + "External id": 980182,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990878.954, "dur": 2.370, + "args": { + "External id": 980183,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990883.230, "dur": 0.388, + "args": { + "External id": 980184,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990885.316, "dur": 0.322, + "args": { + "External id": 980185,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990887.503, "dur": 0.347, + "args": { + "External id": 980186,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990889.532, "dur": 0.479, + "args": { + "External id": 980187,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990891.859, "dur": 0.533, + "args": { + "External id": 980188,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937990894.218, "dur": 0.339, + "args": { + "External id": 980189,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937990911.617, "dur": 32.441, + "args": { + "External id": 980190,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345937990998.063, "dur": 175.363, + "args": { + "External id": 980191,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937991041.487, "dur": 127.899, + "args": { + "External id": 980192,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2783, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345937991086.176, "dur": 78.687, + "args": { + "External id": 980193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345937991189.584, "dur": 2.211, + "args": { + "External id": 980194,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2785, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937991278.830, "dur": 1805.447, + "args": { + "External id": 980195,"Sequence number": 10552280, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2786 + } + }, + { + "ph": "f", "id": 186, "pid": 2338706, "tid": 2379422, "ts": 6345937991278.830, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937991395.052, "dur": 103.911, + "args": { + "External id": 980196,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345937991539.090, "dur": 42.204, + "args": { + "External id": 980197,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345937991600.613, "dur": 49.894, + "args": { + "External id": 980198,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937991662.879, "dur": 31.771, + "args": { + "External id": 980199,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937991702.867, "dur": 32.552, + "args": { + "External id": 980200,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937991744.099, "dur": 28.901, + "args": { + "External id": 980201,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937991781.159, "dur": 30.027, + "args": { + "External id": 980202,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345937991836.603, "dur": 24.637, + "args": { + "External id": 980203,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345937991880.865, "dur": 29.545, + "args": { + "External id": 980204,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937991935.960, "dur": 21.233, + "args": { + "External id": 980205,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937991970.929, "dur": 15.377, + "args": { + "External id": 980206,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937991999.816, "dur": 93.340, + "args": { + "External id": 980207,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937992100.192, "dur": 37.509, + "args": { + "External id": 980208,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345937992174.710, "dur": 266.698, + "args": { + "External id": 980209,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937992260.716, "dur": 6.392, + "args": { + "External id": 980210,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937992269.563, "dur": 3.412, + "args": { + "External id": 980211,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937992274.588, "dur": 2.169, + "args": { + "External id": 980212,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937992278.260, "dur": 2.205, + "args": { + "External id": 980213,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937992328.621, "dur": 5.507, + "args": { + "External id": 980214,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937992330.910, "dur": 3.031, + "args": { + "External id": 980215,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937992336.710, "dur": 36.051, + "args": { + "External id": 980216,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937992342.742, "dur": 4.057, + "args": { + "External id": 980217,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345937992374.555, "dur": 2.276, + "args": { + "External id": 980218,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937992375.736, "dur": 1.016, + "args": { + "External id": 980219,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345937992378.945, "dur": 15.279, + "args": { + "External id": 980220,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937992381.166, "dur": 0.596, + "args": { + "External id": 980221,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345937992483.308, "dur": 28.574, + "args": { + "External id": 980222,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937992528.903, "dur": 17.192, + "args": { + "External id": 980223,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937992555.147, "dur": 41.057, + "args": { + "External id": 980224,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937992604.219, "dur": 38.584, + "args": { + "External id": 980225,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937992654.096, "dur": 21.686, + "args": { + "External id": 980226,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937992682.542, "dur": 30.583, + "args": { + "External id": 980227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937992721.521, "dur": 43.391, + "args": { + "External id": 980228,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345937992779.842, "dur": 36.147, + "args": { + "External id": 980229,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345937992836.515, "dur": 24.366, + "args": { + "External id": 980230,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937992878.032, "dur": 27.791, + "args": { + "External id": 980231,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345937992921.883, "dur": 16.893, + "args": { + "External id": 980232,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345937992959.161, "dur": 13.695, + "args": { + "External id": 980233,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345937992986.095, "dur": 16.853, + "args": { + "External id": 980234,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937993134.530, "dur": 16.691, + "args": { + "External id": 980235,"Record function id": 0, "Ev Idx": 2826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937993138.277, "dur": 11.868, + "args": { + "External id": 980236,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937993143.007, "dur": 6.198, + "args": { + "External id": 980237,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937993144.982, "dur": 4.104, + "args": { + "External id": 980238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937993155.284, "dur": 6.234, + "args": { + "External id": 980239,"Record function id": 0, "Ev Idx": 2830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937993157.218, "dur": 3.800, + "args": { + "External id": 980240,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937993158.174, "dur": 2.361, + "args": { + "External id": 980241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937993159.206, "dur": 1.184, + "args": { + "External id": 980242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937993165.004, "dur": 7.143, + "args": { + "External id": 980243,"Record function id": 0, "Ev Idx": 2834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937993166.292, "dur": 5.433, + "args": { + "External id": 980244,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937993166.952, "dur": 4.323, + "args": { + "External id": 980245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937993167.427, "dur": 3.761, + "args": { + "External id": 980246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937993175.612, "dur": 4.882, + "args": { + "External id": 980247,"Record function id": 0, "Ev Idx": 2838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937993177.265, "dur": 2.813, + "args": { + "External id": 980248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937993178.029, "dur": 1.634, + "args": { + "External id": 980249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937993178.612, "dur": 0.984, + "args": { + "External id": 980250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937993183.983, "dur": 4.566, + "args": { + "External id": 980251,"Record function id": 0, "Ev Idx": 2842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937993185.324, "dur": 2.795, + "args": { + "External id": 980252,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937993186.175, "dur": 1.340, + "args": { + "External id": 980253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937993186.640, "dur": 0.810, + "args": { + "External id": 980254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937993192.213, "dur": 4.368, + "args": { + "External id": 980255,"Record function id": 0, "Ev Idx": 2846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937993193.492, "dur": 2.596, + "args": { + "External id": 980256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937993194.108, "dur": 1.336, + "args": { + "External id": 980257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937993194.657, "dur": 0.686, + "args": { + "External id": 980258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937993199.980, "dur": 3.710, + "args": { + "External id": 980259,"Record function id": 0, "Ev Idx": 2850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937993201.154, "dur": 2.110, + "args": { + "External id": 980260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937993201.813, "dur": 1.015, + "args": { + "External id": 980261,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937993202.106, "dur": 0.623, + "args": { + "External id": 980262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937993207.196, "dur": 3.901, + "args": { + "External id": 980263,"Record function id": 0, "Ev Idx": 2854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937993208.239, "dur": 2.458, + "args": { + "External id": 980264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937993208.879, "dur": 1.431, + "args": { + "External id": 980265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937993209.339, "dur": 0.900, + "args": { + "External id": 980266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937993214.488, "dur": 4.399, + "args": { + "External id": 980267,"Record function id": 0, "Ev Idx": 2858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345937993215.705, "dur": 2.773, + "args": { + "External id": 980268,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937993216.477, "dur": 1.540, + "args": { + "External id": 980269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345937993217.277, "dur": 0.644, + "args": { + "External id": 980270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937993223.106, "dur": 62562.907, + "args": { + "External id": 980271,"Record function id": 0, "Sequence number": 10552279, "Fwd thread id": 1, "Ev Idx": 2862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345937993224.318, "dur": 62551.957, + "args": { + "External id": 980272,"Sequence number": 10552279, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2863 + } + }, + { + "ph": "f", "id": 187, "pid": 2338706, "tid": 2379422, "ts": 6345937993224.318, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.18)", "pid": 2338706, "tid": 2379422, + "ts": 6345937993256.566, "dur": 39.978, + "args": { + "External id": 980273,"Record function id": 0, "Ev Idx": 2864 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.18)", "pid": 2338706, "tid": 2379422, + "ts": 6345937993304.391, "dur": 68.044, + "args": { + "External id": 980274,"Record function id": 0, "Ev Idx": 2865 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.18)", "pid": 2338706, "tid": 2379422, + "ts": 6345937993378.588, "dur": 62389.403, + "args": { + "External id": 980275,"Record function id": 0, "Ev Idx": 2866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937993471.262, "dur": 7.049, + "args": { + "External id": 980276,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345937993488.632, "dur": 6.933, + "args": { + "External id": 980277,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345937993511.857, "dur": 61337.235, + "args": { + "External id": 980278,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345937993525.970, "dur": 61308.659, + "args": { + "External id": 980279,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345937993657.955, "dur": 18.081, + "args": { + "External id": 980280,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345937993695.615, "dur": 61089.562, + "args": { + "External id": 980281,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345937993698.915, "dur": 61085.213, + "args": { + "External id": 980282,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345937993703.656, "dur": 10.213, + "args": { + "External id": 980283,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345937993716.267, "dur": 61062.545, + "args": { + "External id": 980284,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938054964.097, "dur": 13.643, + "args": { + "External id": 980285,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938054968.426, "dur": 8.962, + "args": { + "External id": 980286,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938055025.248, "dur": 414.672, + "args": { + "External id": 980287,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938055084.079, "dur": 350.355, + "args": { + "External id": 980288,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2879, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938055098.513, "dur": 330.188, + "args": { + "External id": 980289,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938055462.059, "dur": 2.491, + "args": { + "External id": 980290,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2881, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938055530.951, "dur": 6.844, + "args": { + "External id": 980291,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938055587.494, "dur": 1.534, + "args": { + "External id": 980292,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938055607.702, "dur": 3.806, + "args": { + "External id": 980293,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938055625.914, "dur": 0.920, + "args": { + "External id": 980294,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938055640.639, "dur": 1.070, + "args": { + "External id": 980295,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938055654.789, "dur": 1.032, + "args": { + "External id": 980296,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938055669.398, "dur": 3.237, + "args": { + "External id": 980297,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938055685.916, "dur": 2.254, + "args": { + "External id": 980298,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938055701.128, "dur": 1.128, + "args": { + "External id": 980299,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938055801.722, "dur": 3113.440, + "args": { + "External id": 980300,"Record function id": 0, "Ev Idx": 2891 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.17)", "pid": 2338706, "tid": 2379422, + "ts": 6345938055823.149, "dur": 1159.210, + "args": { + "External id": 980301,"Record function id": 0, "Ev Idx": 2892 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338706, "tid": 2379422, + "ts": 6345938055838.413, "dur": 438.980, + "args": { + "External id": 980302,"Record function id": 0, "Ev Idx": 2893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938055933.001, "dur": 4.447, + "args": { + "External id": 980303,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938055940.564, "dur": 1.447, + "args": { + "External id": 980304,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938055944.186, "dur": 3.424, + "args": { + "External id": 980305,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938055949.224, "dur": 1.286, + "args": { + "External id": 980306,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938055952.315, "dur": 1.165, + "args": { + "External id": 980307,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938055955.326, "dur": 1.056, + "args": { + "External id": 980308,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938055958.184, "dur": 1.706, + "args": { + "External id": 980309,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938055961.504, "dur": 1.013, + "args": { + "External id": 980310,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938055964.268, "dur": 1.267, + "args": { + "External id": 980311,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938055967.179, "dur": 0.756, + "args": { + "External id": 980312,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938056029.878, "dur": 206.622, + "args": { + "External id": 980313,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938056050.704, "dur": 179.972, + "args": { + "External id": 980314,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938056103.709, "dur": 19.065, + "args": { + "External id": 980315,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938056127.210, "dur": 71.755, + "args": { + "External id": 980316,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938056130.223, "dur": 68.313, + "args": { + "External id": 980317,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056134.875, "dur": 6.231, + "args": { + "External id": 980318,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938056143.072, "dur": 54.894, + "args": { + "External id": 980319,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.16", "pid": 2338706, "tid": 2379422, + "ts": 6345938056378.145, "dur": 595.493, + "args": { + "External id": 980320,"Record function id": 0, "Ev Idx": 2911 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338706, "tid": 2379422, + "ts": 6345938056398.540, "dur": 561.969, + "args": { + "External id": 980321,"Record function id": 0, "Ev Idx": 2912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938056466.677, "dur": 5.679, + "args": { + "External id": 980322,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938056489.005, "dur": 35.557, + "args": { + "External id": 980323,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056495.304, "dur": 1.759, + "args": { + "External id": 980324,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056499.455, "dur": 0.691, + "args": { + "External id": 980325,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056502.122, "dur": 0.430, + "args": { + "External id": 980326,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056504.159, "dur": 0.478, + "args": { + "External id": 980327,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056506.563, "dur": 0.498, + "args": { + "External id": 980328,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056509.249, "dur": 2.795, + "args": { + "External id": 980329,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056514.013, "dur": 0.451, + "args": { + "External id": 980330,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056516.482, "dur": 0.587, + "args": { + "External id": 980331,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056519.117, "dur": 0.577, + "args": { + "External id": 980332,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938056536.350, "dur": 45.689, + "args": { + "External id": 980333,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345938056614.840, "dur": 115.068, + "args": { + "External id": 980334,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938056625.810, "dur": 3.760, + "args": { + "External id": 980335,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345938056635.213, "dur": 11.113, + "args": { + "External id": 980336,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345938056639.838, "dur": 6.055, + "args": { + "External id": 980337,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056643.882, "dur": 0.642, + "args": { + "External id": 980338,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938056653.122, "dur": 28.384, + "args": { + "External id": 980339,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056655.491, "dur": 0.618, + "args": { + "External id": 980340,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056658.105, "dur": 0.480, + "args": { + "External id": 980341,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056660.591, "dur": 2.531, + "args": { + "External id": 980342,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056664.849, "dur": 0.573, + "args": { + "External id": 980343,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056667.634, "dur": 0.384, + "args": { + "External id": 980344,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056669.981, "dur": 0.317, + "args": { + "External id": 980345,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056671.995, "dur": 0.504, + "args": { + "External id": 980346,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056674.313, "dur": 0.388, + "args": { + "External id": 980347,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938056676.654, "dur": 0.394, + "args": { + "External id": 980348,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938056691.451, "dur": 30.448, + "args": { + "External id": 980349,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938056776.451, "dur": 117.534, + "args": { + "External id": 980350,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938056800.700, "dur": 89.473, + "args": { + "External id": 980351,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2942, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938056811.162, "dur": 74.611, + "args": { + "External id": 980352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938056908.126, "dur": 1.835, + "args": { + "External id": 980353,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2944, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938056989.562, "dur": 1899.521, + "args": { + "External id": 980354,"Sequence number": 10552278, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2945 + } + }, + { + "ph": "f", "id": 188, "pid": 2338706, "tid": 2379422, "ts": 6345938056989.562, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938057166.065, "dur": 111.158, + "args": { + "External id": 980355,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938057320.360, "dur": 44.096, + "args": { + "External id": 980356,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345938057383.983, "dur": 50.184, + "args": { + "External id": 980357,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938057446.451, "dur": 32.140, + "args": { + "External id": 980358,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938057486.398, "dur": 33.934, + "args": { + "External id": 980359,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938057528.616, "dur": 28.244, + "args": { + "External id": 980360,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938057565.610, "dur": 29.287, + "args": { + "External id": 980361,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938057621.817, "dur": 24.004, + "args": { + "External id": 980362,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938057684.730, "dur": 29.816, + "args": { + "External id": 980363,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938057738.409, "dur": 18.877, + "args": { + "External id": 980364,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938057772.165, "dur": 14.799, + "args": { + "External id": 980365,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938057796.835, "dur": 36.206, + "args": { + "External id": 980366,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938057837.214, "dur": 33.434, + "args": { + "External id": 980367,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345938057904.182, "dur": 354.607, + "args": { + "External id": 980368,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938057986.592, "dur": 6.704, + "args": { + "External id": 980369,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938057995.622, "dur": 2.771, + "args": { + "External id": 980370,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938058044.645, "dur": 4.037, + "args": { + "External id": 980371,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938058051.434, "dur": 34.947, + "args": { + "External id": 980372,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938058138.115, "dur": 5.632, + "args": { + "External id": 980373,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938058140.521, "dur": 3.050, + "args": { + "External id": 980374,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938058146.535, "dur": 35.884, + "args": { + "External id": 980375,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938058152.885, "dur": 4.022, + "args": { + "External id": 980376,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938058184.442, "dur": 1.957, + "args": { + "External id": 980377,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938058185.658, "dur": 0.667, + "args": { + "External id": 980378,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938058188.292, "dur": 15.671, + "args": { + "External id": 980379,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938058190.769, "dur": 0.768, + "args": { + "External id": 980380,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938058301.655, "dur": 30.168, + "args": { + "External id": 980381,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938058350.929, "dur": 17.079, + "args": { + "External id": 980382,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938058377.450, "dur": 53.640, + "args": { + "External id": 980383,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938058439.069, "dur": 56.065, + "args": { + "External id": 980384,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938058514.574, "dur": 29.109, + "args": { + "External id": 980385,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938058550.668, "dur": 34.940, + "args": { + "External id": 980386,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938058593.943, "dur": 28.980, + "args": { + "External id": 980387,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938058630.665, "dur": 31.162, + "args": { + "External id": 980388,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345938058682.917, "dur": 24.291, + "args": { + "External id": 980389,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938058727.122, "dur": 27.756, + "args": { + "External id": 980390,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938058769.541, "dur": 23.255, + "args": { + "External id": 980391,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938058809.693, "dur": 17.062, + "args": { + "External id": 980392,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345938058839.594, "dur": 18.397, + "args": { + "External id": 980393,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938058939.084, "dur": 16.678, + "args": { + "External id": 980394,"Record function id": 0, "Ev Idx": 2985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938058942.532, "dur": 12.242, + "args": { + "External id": 980395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938058947.152, "dur": 6.653, + "args": { + "External id": 980396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938058949.484, "dur": 4.193, + "args": { + "External id": 980397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938058959.785, "dur": 6.119, + "args": { + "External id": 980398,"Record function id": 0, "Ev Idx": 2989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938058961.321, "dur": 4.099, + "args": { + "External id": 980399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938058962.525, "dur": 2.305, + "args": { + "External id": 980400,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938058963.490, "dur": 1.198, + "args": { + "External id": 980401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938058969.190, "dur": 7.090, + "args": { + "External id": 980402,"Record function id": 0, "Ev Idx": 2993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938058970.647, "dur": 5.209, + "args": { + "External id": 980403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938058971.334, "dur": 4.051, + "args": { + "External id": 980404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938058972.023, "dur": 3.302, + "args": { + "External id": 980405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938058979.802, "dur": 4.878, + "args": { + "External id": 980406,"Record function id": 0, "Ev Idx": 2997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938058981.326, "dur": 2.905, + "args": { + "External id": 980407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938058982.260, "dur": 1.557, + "args": { + "External id": 980408,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938058982.742, "dur": 1.015, + "args": { + "External id": 980409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938058988.187, "dur": 4.216, + "args": { + "External id": 980410,"Record function id": 0, "Ev Idx": 3001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938058989.465, "dur": 2.480, + "args": { + "External id": 980411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938058990.150, "dur": 1.335, + "args": { + "External id": 980412,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938058990.694, "dur": 0.718, + "args": { + "External id": 980413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938058995.798, "dur": 4.426, + "args": { + "External id": 980414,"Record function id": 0, "Ev Idx": 3005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938058997.160, "dur": 2.587, + "args": { + "External id": 980415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938058997.828, "dur": 1.310, + "args": { + "External id": 980416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938058998.360, "dur": 0.658, + "args": { + "External id": 980417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938059003.563, "dur": 22.897, + "args": { + "External id": 980418,"Record function id": 0, "Ev Idx": 3009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938059004.753, "dur": 2.332, + "args": { + "External id": 980419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938059005.571, "dur": 1.019, + "args": { + "External id": 980420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938059005.875, "dur": 0.648, + "args": { + "External id": 980421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938059032.570, "dur": 6.030, + "args": { + "External id": 980422,"Record function id": 0, "Ev Idx": 3013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938059034.151, "dur": 3.994, + "args": { + "External id": 980423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938059035.401, "dur": 1.950, + "args": { + "External id": 980424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938059036.063, "dur": 1.208, + "args": { + "External id": 980425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938059041.948, "dur": 4.733, + "args": { + "External id": 980426,"Record function id": 0, "Ev Idx": 3017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938059043.252, "dur": 3.018, + "args": { + "External id": 980427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938059044.072, "dur": 1.644, + "args": { + "External id": 980428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938059045.048, "dur": 0.569, + "args": { + "External id": 980429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938059050.826, "dur": 65239.071, + "args": { + "External id": 980430,"Record function id": 0, "Sequence number": 10552277, "Fwd thread id": 1, "Ev Idx": 3021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938059088.898, "dur": 65190.106, + "args": { + "External id": 980431,"Sequence number": 10552277, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3022 + } + }, + { + "ph": "f", "id": 189, "pid": 2338706, "tid": 2379422, "ts": 6345938059088.898, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.17)", "pid": 2338706, "tid": 2379422, + "ts": 6345938059124.246, "dur": 44.943, + "args": { + "External id": 980432,"Record function id": 0, "Ev Idx": 3023 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.17)", "pid": 2338706, "tid": 2379422, + "ts": 6345938059177.748, "dur": 69.054, + "args": { + "External id": 980433,"Record function id": 0, "Ev Idx": 3024 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.17)", "pid": 2338706, "tid": 2379422, + "ts": 6345938059252.572, "dur": 65016.712, + "args": { + "External id": 980434,"Record function id": 0, "Ev Idx": 3025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938059346.886, "dur": 7.649, + "args": { + "External id": 980435,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938059365.458, "dur": 7.319, + "args": { + "External id": 980436,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938059389.027, "dur": 63920.620, + "args": { + "External id": 980437,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938059403.317, "dur": 63892.058, + "args": { + "External id": 980438,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938059507.715, "dur": 18.324, + "args": { + "External id": 980439,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938059546.101, "dur": 63698.739, + "args": { + "External id": 980440,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938059549.646, "dur": 63694.047, + "args": { + "External id": 980441,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938059554.900, "dur": 9.873, + "args": { + "External id": 980442,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938059567.500, "dur": 63670.702, + "args": { + "External id": 980443,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938123422.682, "dur": 14.058, + "args": { + "External id": 980444,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938123427.108, "dur": 9.200, + "args": { + "External id": 980445,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938123470.085, "dur": 419.779, + "args": { + "External id": 980446,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938123500.702, "dur": 383.447, + "args": { + "External id": 980447,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3038, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938123513.317, "dur": 365.326, + "args": { + "External id": 980448,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938123909.915, "dur": 2.531, + "args": { + "External id": 980449,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3040, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938123973.909, "dur": 7.035, + "args": { + "External id": 980450,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938124082.810, "dur": 3.522, + "args": { + "External id": 980451,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938124110.519, "dur": 3.813, + "args": { + "External id": 980452,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938124128.107, "dur": 1.133, + "args": { + "External id": 980453,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938124142.337, "dur": 0.993, + "args": { + "External id": 980454,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938124155.868, "dur": 0.937, + "args": { + "External id": 980455,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938124170.330, "dur": 3.011, + "args": { + "External id": 980456,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938124186.756, "dur": 2.411, + "args": { + "External id": 980457,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938124202.281, "dur": 1.163, + "args": { + "External id": 980458,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938124307.440, "dur": 3257.659, + "args": { + "External id": 980459,"Record function id": 0, "Ev Idx": 3050 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.16)", "pid": 2338706, "tid": 2379422, + "ts": 6345938124328.888, "dur": 1294.231, + "args": { + "External id": 980460,"Record function id": 0, "Ev Idx": 3051 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338706, "tid": 2379422, + "ts": 6345938124344.484, "dur": 376.248, + "args": { + "External id": 980461,"Record function id": 0, "Ev Idx": 3052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938124444.916, "dur": 4.356, + "args": { + "External id": 980462,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938124452.753, "dur": 1.280, + "args": { + "External id": 980463,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938124456.151, "dur": 3.405, + "args": { + "External id": 980464,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938124461.411, "dur": 1.098, + "args": { + "External id": 980465,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938124464.110, "dur": 0.887, + "args": { + "External id": 980466,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938124466.838, "dur": 1.062, + "args": { + "External id": 980467,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938124469.834, "dur": 2.578, + "args": { + "External id": 980468,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938124474.269, "dur": 0.982, + "args": { + "External id": 980469,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938124477.084, "dur": 0.980, + "args": { + "External id": 980470,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938124479.849, "dur": 0.976, + "args": { + "External id": 980471,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938124525.654, "dur": 162.306, + "args": { + "External id": 980472,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938124544.055, "dur": 139.318, + "args": { + "External id": 980473,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938124563.133, "dur": 18.707, + "args": { + "External id": 980474,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938124585.965, "dur": 67.827, + "args": { + "External id": 980475,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938124589.193, "dur": 64.154, + "args": { + "External id": 980476,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938124593.528, "dur": 5.384, + "args": { + "External id": 980477,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938124600.898, "dur": 51.830, + "args": { + "External id": 980478,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3069 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.15", "pid": 2338706, "tid": 2379422, + "ts": 6345938124817.039, "dur": 796.768, + "args": { + "External id": 980479,"Record function id": 0, "Ev Idx": 3070 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338706, "tid": 2379422, + "ts": 6345938124836.923, "dur": 762.578, + "args": { + "External id": 980480,"Record function id": 0, "Ev Idx": 3071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938124992.567, "dur": 32.325, + "args": { + "External id": 980481,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938125045.405, "dur": 72.455, + "args": { + "External id": 980482,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938125051.799, "dur": 35.092, + "args": { + "External id": 980483,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938125090.867, "dur": 0.801, + "args": { + "External id": 980484,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938125093.404, "dur": 0.497, + "args": { + "External id": 980485,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938125095.773, "dur": 0.427, + "args": { + "External id": 980486,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938125098.657, "dur": 0.648, + "args": { + "External id": 980487,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938125101.230, "dur": 2.793, + "args": { + "External id": 980488,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938125106.156, "dur": 0.480, + "args": { + "External id": 980489,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938125108.347, "dur": 0.438, + "args": { + "External id": 980490,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938125110.675, "dur": 0.563, + "args": { + "External id": 980491,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938125130.107, "dur": 56.577, + "args": { + "External id": 980492,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345938125224.632, "dur": 127.326, + "args": { + "External id": 980493,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938125237.684, "dur": 5.353, + "args": { + "External id": 980494,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345938125248.908, "dur": 10.995, + "args": { + "External id": 980495,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345938125253.463, "dur": 6.004, + "args": { + "External id": 980496,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938125257.578, "dur": 0.611, + "args": { + "External id": 980497,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938125267.923, "dur": 28.775, + "args": { + "External id": 980498,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938125270.740, "dur": 0.636, + "args": { + "External id": 980499,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938125273.428, "dur": 0.481, + "args": { + "External id": 980500,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938125276.041, "dur": 2.657, + "args": { + "External id": 980501,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938125280.453, "dur": 0.352, + "args": { + "External id": 980502,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938125282.424, "dur": 0.467, + "args": { + "External id": 980503,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938125284.563, "dur": 0.497, + "args": { + "External id": 980504,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938125286.886, "dur": 0.350, + "args": { + "External id": 980505,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938125288.867, "dur": 0.368, + "args": { + "External id": 980506,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938125290.864, "dur": 0.440, + "args": { + "External id": 980507,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938125310.464, "dur": 33.261, + "args": { + "External id": 980508,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938125408.124, "dur": 119.139, + "args": { + "External id": 980509,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938125434.641, "dur": 88.612, + "args": { + "External id": 980510,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3101, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938125445.658, "dur": 73.412, + "args": { + "External id": 980511,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938125542.463, "dur": 2.153, + "args": { + "External id": 980512,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3103, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938125630.695, "dur": 1910.855, + "args": { + "External id": 980513,"Sequence number": 10552276, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3104 + } + }, + { + "ph": "f", "id": 190, "pid": 2338706, "tid": 2379422, "ts": 6345938125630.695, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938125747.315, "dur": 106.523, + "args": { + "External id": 980514,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938125944.001, "dur": 41.275, + "args": { + "External id": 980515,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345938126006.310, "dur": 118.110, + "args": { + "External id": 980516,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938126143.290, "dur": 36.218, + "args": { + "External id": 980517,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938126188.059, "dur": 35.896, + "args": { + "External id": 980518,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938126233.692, "dur": 29.136, + "args": { + "External id": 980519,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938126271.005, "dur": 30.838, + "args": { + "External id": 980520,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938126331.997, "dur": 27.719, + "args": { + "External id": 980521,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938126381.234, "dur": 31.910, + "args": { + "External id": 980522,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938126436.128, "dur": 21.290, + "args": { + "External id": 980523,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938126471.238, "dur": 18.140, + "args": { + "External id": 980524,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938126498.337, "dur": 38.278, + "args": { + "External id": 980525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938126540.305, "dur": 32.700, + "args": { + "External id": 980526,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345938126606.269, "dur": 266.031, + "args": { + "External id": 980527,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938126692.952, "dur": 6.367, + "args": { + "External id": 980528,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938126701.851, "dur": 2.726, + "args": { + "External id": 980529,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938126706.163, "dur": 1.809, + "args": { + "External id": 980530,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938126709.544, "dur": 2.100, + "args": { + "External id": 980531,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938126761.124, "dur": 5.156, + "args": { + "External id": 980532,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938126763.303, "dur": 2.818, + "args": { + "External id": 980533,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938126768.558, "dur": 33.874, + "args": { + "External id": 980534,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938126774.400, "dur": 3.148, + "args": { + "External id": 980535,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938126804.734, "dur": 2.221, + "args": { + "External id": 980536,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938126806.112, "dur": 0.771, + "args": { + "External id": 980537,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938126808.383, "dur": 16.355, + "args": { + "External id": 980538,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938126811.669, "dur": 0.609, + "args": { + "External id": 980539,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938126909.632, "dur": 40.428, + "args": { + "External id": 980540,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938126975.459, "dur": 20.667, + "args": { + "External id": 980541,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938127005.880, "dur": 106.206, + "args": { + "External id": 980542,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938127123.356, "dur": 45.034, + "args": { + "External id": 980543,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938127183.988, "dur": 20.849, + "args": { + "External id": 980544,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938127211.963, "dur": 31.430, + "args": { + "External id": 980545,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938127251.950, "dur": 29.763, + "args": { + "External id": 980546,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938127289.257, "dur": 31.189, + "args": { + "External id": 980547,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345938127340.920, "dur": 25.953, + "args": { + "External id": 980548,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938127383.480, "dur": 26.964, + "args": { + "External id": 980549,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938127424.674, "dur": 19.303, + "args": { + "External id": 980550,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938127459.748, "dur": 16.623, + "args": { + "External id": 980551,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345938127490.141, "dur": 19.073, + "args": { + "External id": 980552,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938127589.043, "dur": 16.694, + "args": { + "External id": 980553,"Record function id": 0, "Ev Idx": 3144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938127592.843, "dur": 12.068, + "args": { + "External id": 980554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938127597.599, "dur": 6.193, + "args": { + "External id": 980555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938127599.361, "dur": 4.307, + "args": { + "External id": 980556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938127609.666, "dur": 27.270, + "args": { + "External id": 980557,"Record function id": 0, "Ev Idx": 3148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938127611.228, "dur": 25.178, + "args": { + "External id": 980558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938127612.061, "dur": 23.727, + "args": { + "External id": 980559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938127634.304, "dur": 1.341, + "args": { + "External id": 980560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938127640.510, "dur": 6.930, + "args": { + "External id": 980561,"Record function id": 0, "Ev Idx": 3152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938127642.154, "dur": 4.880, + "args": { + "External id": 980562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938127642.999, "dur": 3.558, + "args": { + "External id": 980563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938127643.508, "dur": 2.936, + "args": { + "External id": 980564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938127650.950, "dur": 5.353, + "args": { + "External id": 980565,"Record function id": 0, "Ev Idx": 3156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938127652.490, "dur": 3.393, + "args": { + "External id": 980566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938127653.657, "dur": 1.797, + "args": { + "External id": 980567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938127654.187, "dur": 1.201, + "args": { + "External id": 980568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938127659.638, "dur": 4.176, + "args": { + "External id": 980569,"Record function id": 0, "Ev Idx": 3160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938127660.833, "dur": 2.570, + "args": { + "External id": 980570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938127661.714, "dur": 1.230, + "args": { + "External id": 980571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938127662.087, "dur": 0.794, + "args": { + "External id": 980572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938127667.074, "dur": 4.361, + "args": { + "External id": 980573,"Record function id": 0, "Ev Idx": 3164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938127668.653, "dur": 2.372, + "args": { + "External id": 980574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938127669.179, "dur": 1.422, + "args": { + "External id": 980575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938127669.808, "dur": 0.705, + "args": { + "External id": 980576,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938127674.727, "dur": 5.573, + "args": { + "External id": 980577,"Record function id": 0, "Ev Idx": 3168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938127675.919, "dur": 3.994, + "args": { + "External id": 980578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938127676.384, "dur": 2.876, + "args": { + "External id": 980579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938127678.605, "dur": 0.583, + "args": { + "External id": 980580,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938127683.690, "dur": 4.030, + "args": { + "External id": 980581,"Record function id": 0, "Ev Idx": 3172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938127685.121, "dur": 2.196, + "args": { + "External id": 980582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938127685.601, "dur": 1.300, + "args": { + "External id": 980583,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938127686.148, "dur": 0.680, + "args": { + "External id": 980584,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938127690.974, "dur": 4.320, + "args": { + "External id": 980585,"Record function id": 0, "Ev Idx": 3176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938127692.317, "dur": 2.564, + "args": { + "External id": 980586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938127692.994, "dur": 1.359, + "args": { + "External id": 980587,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938127693.503, "dur": 0.783, + "args": { + "External id": 980588,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938127699.340, "dur": 66048.450, + "args": { + "External id": 980589,"Record function id": 0, "Sequence number": 10552275, "Fwd thread id": 1, "Ev Idx": 3180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938127700.617, "dur": 66037.555, + "args": { + "External id": 980590,"Sequence number": 10552275, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3181 + } + }, + { + "ph": "f", "id": 191, "pid": 2338706, "tid": 2379422, "ts": 6345938127700.617, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.16)", "pid": 2338706, "tid": 2379422, + "ts": 6345938127733.632, "dur": 39.486, + "args": { + "External id": 980591,"Record function id": 0, "Ev Idx": 3182 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.16)", "pid": 2338706, "tid": 2379422, + "ts": 6345938127780.804, "dur": 68.068, + "args": { + "External id": 980592,"Record function id": 0, "Ev Idx": 3183 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.16)", "pid": 2338706, "tid": 2379422, + "ts": 6345938127855.126, "dur": 65875.492, + "args": { + "External id": 980593,"Record function id": 0, "Ev Idx": 3184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938127945.882, "dur": 6.841, + "args": { + "External id": 980594,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938127962.750, "dur": 6.415, + "args": { + "External id": 980595,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938127985.301, "dur": 64909.153, + "args": { + "External id": 980596,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938127999.275, "dur": 64880.490, + "args": { + "External id": 980597,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938128147.908, "dur": 21.508, + "args": { + "External id": 980598,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938128190.085, "dur": 64639.891, + "args": { + "External id": 980599,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938128193.542, "dur": 64635.434, + "args": { + "External id": 980600,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938128198.359, "dur": 13.112, + "args": { + "External id": 980601,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938128213.806, "dur": 64610.498, + "args": { + "External id": 980602,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938193020.407, "dur": 15.260, + "args": { + "External id": 980603,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938193025.712, "dur": 9.438, + "args": { + "External id": 980604,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938193101.551, "dur": 316.561, + "args": { + "External id": 980605,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938193131.808, "dur": 281.153, + "args": { + "External id": 980606,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3197, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938193145.305, "dur": 262.093, + "args": { + "External id": 980607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938193437.294, "dur": 2.169, + "args": { + "External id": 980608,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3199, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938193502.662, "dur": 7.033, + "args": { + "External id": 980609,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938193559.824, "dur": 1.812, + "args": { + "External id": 980610,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938193579.599, "dur": 3.487, + "args": { + "External id": 980611,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938193596.650, "dur": 1.175, + "args": { + "External id": 980612,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938193610.313, "dur": 0.951, + "args": { + "External id": 980613,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938193623.400, "dur": 1.241, + "args": { + "External id": 980614,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938193636.684, "dur": 3.050, + "args": { + "External id": 980615,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938193652.754, "dur": 3.016, + "args": { + "External id": 980616,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938193667.640, "dur": 0.835, + "args": { + "External id": 980617,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938193763.156, "dur": 3030.950, + "args": { + "External id": 980618,"Record function id": 0, "Ev Idx": 3209 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.15)", "pid": 2338706, "tid": 2379422, + "ts": 6345938193784.929, "dur": 1148.855, + "args": { + "External id": 980619,"Record function id": 0, "Ev Idx": 3210 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338706, "tid": 2379422, + "ts": 6345938193799.847, "dur": 427.268, + "args": { + "External id": 980620,"Record function id": 0, "Ev Idx": 3211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938193890.250, "dur": 4.307, + "args": { + "External id": 980621,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938193898.026, "dur": 1.427, + "args": { + "External id": 980622,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938193901.474, "dur": 2.983, + "args": { + "External id": 980623,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938193906.308, "dur": 0.945, + "args": { + "External id": 980624,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938193908.719, "dur": 0.814, + "args": { + "External id": 980625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938193911.145, "dur": 0.960, + "args": { + "External id": 980626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938193913.674, "dur": 2.540, + "args": { + "External id": 980627,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938193917.779, "dur": 0.817, + "args": { + "External id": 980628,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938193919.935, "dur": 1.362, + "args": { + "External id": 980629,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938193922.621, "dur": 0.809, + "args": { + "External id": 980630,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938193942.605, "dur": 244.473, + "args": { + "External id": 980631,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938193982.227, "dur": 198.919, + "args": { + "External id": 980632,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938194001.241, "dur": 36.434, + "args": { + "External id": 980633,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938194042.797, "dur": 106.044, + "args": { + "External id": 980634,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938194045.893, "dur": 102.551, + "args": { + "External id": 980635,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194050.967, "dur": 39.987, + "args": { + "External id": 980636,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938194093.645, "dur": 54.121, + "args": { + "External id": 980637,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3228 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.14", "pid": 2338706, "tid": 2379422, + "ts": 6345938194327.123, "dur": 598.234, + "args": { + "External id": 980638,"Record function id": 0, "Ev Idx": 3229 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338706, "tid": 2379422, + "ts": 6345938194346.435, "dur": 565.910, + "args": { + "External id": 980639,"Record function id": 0, "Ev Idx": 3230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938194428.585, "dur": 8.033, + "args": { + "External id": 980640,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938194455.125, "dur": 33.145, + "args": { + "External id": 980641,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194460.291, "dur": 1.659, + "args": { + "External id": 980642,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194464.258, "dur": 0.691, + "args": { + "External id": 980643,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194466.775, "dur": 0.790, + "args": { + "External id": 980644,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194469.477, "dur": 0.480, + "args": { + "External id": 980645,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194471.779, "dur": 0.619, + "args": { + "External id": 980646,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194474.241, "dur": 2.617, + "args": { + "External id": 980647,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194478.593, "dur": 0.451, + "args": { + "External id": 980648,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194480.678, "dur": 0.450, + "args": { + "External id": 980649,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194482.723, "dur": 0.447, + "args": { + "External id": 980650,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938194499.151, "dur": 45.541, + "args": { + "External id": 980651,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345938194576.588, "dur": 115.133, + "args": { + "External id": 980652,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938194587.299, "dur": 3.566, + "args": { + "External id": 980653,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345938194596.337, "dur": 10.984, + "args": { + "External id": 980654,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345938194601.116, "dur": 5.783, + "args": { + "External id": 980655,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194605.039, "dur": 0.666, + "args": { + "External id": 980656,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938194614.635, "dur": 27.557, + "args": { + "External id": 980657,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194616.868, "dur": 0.564, + "args": { + "External id": 980658,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194619.172, "dur": 0.541, + "args": { + "External id": 980659,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194621.532, "dur": 2.878, + "args": { + "External id": 980660,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194625.821, "dur": 0.613, + "args": { + "External id": 980661,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194628.227, "dur": 0.696, + "args": { + "External id": 980662,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194630.394, "dur": 0.419, + "args": { + "External id": 980663,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194632.294, "dur": 0.713, + "args": { + "External id": 980664,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194634.710, "dur": 0.417, + "args": { + "External id": 980665,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938194636.977, "dur": 0.317, + "args": { + "External id": 980666,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938194651.866, "dur": 32.106, + "args": { + "External id": 980667,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938194737.769, "dur": 110.831, + "args": { + "External id": 980668,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938194760.437, "dur": 84.545, + "args": { + "External id": 980669,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3260, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938194770.477, "dur": 70.285, + "args": { + "External id": 980670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938194862.548, "dur": 2.456, + "args": { + "External id": 980671,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3262, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938194941.495, "dur": 1826.886, + "args": { + "External id": 980672,"Sequence number": 10552274, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3263 + } + }, + { + "ph": "f", "id": 192, "pid": 2338706, "tid": 2379422, "ts": 6345938194941.495, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938195116.527, "dur": 114.479, + "args": { + "External id": 980673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938195273.288, "dur": 43.172, + "args": { + "External id": 980674,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345938195335.301, "dur": 50.427, + "args": { + "External id": 980675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938195397.953, "dur": 32.527, + "args": { + "External id": 980676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938195438.335, "dur": 35.393, + "args": { + "External id": 980677,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938195481.515, "dur": 27.787, + "args": { + "External id": 980678,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938195516.833, "dur": 31.952, + "args": { + "External id": 980679,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938195574.638, "dur": 24.888, + "args": { + "External id": 980680,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938195618.573, "dur": 29.613, + "args": { + "External id": 980681,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938195668.238, "dur": 20.308, + "args": { + "External id": 980682,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938195702.875, "dur": 15.334, + "args": { + "External id": 980683,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938195727.174, "dur": 35.297, + "args": { + "External id": 980684,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938195766.195, "dur": 32.249, + "args": { + "External id": 980685,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345938195829.523, "dur": 325.913, + "args": { + "External id": 980686,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938195911.909, "dur": 6.233, + "args": { + "External id": 980687,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938195920.428, "dur": 2.869, + "args": { + "External id": 980688,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938195924.523, "dur": 2.116, + "args": { + "External id": 980689,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938195927.950, "dur": 1.776, + "args": { + "External id": 980690,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938195979.174, "dur": 5.055, + "args": { + "External id": 980691,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938195981.330, "dur": 2.721, + "args": { + "External id": 980692,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938195986.536, "dur": 54.881, + "args": { + "External id": 980693,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938195991.723, "dur": 3.194, + "args": { + "External id": 980694,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938196044.713, "dur": 2.329, + "args": { + "External id": 980695,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938196045.912, "dur": 1.035, + "args": { + "External id": 980696,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938196048.689, "dur": 56.060, + "args": { + "External id": 980697,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938196051.191, "dur": 0.783, + "args": { + "External id": 980698,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938196196.355, "dur": 29.016, + "args": { + "External id": 980699,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938196242.948, "dur": 19.617, + "args": { + "External id": 980700,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938196271.612, "dur": 51.967, + "args": { + "External id": 980701,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938196330.722, "dur": 41.490, + "args": { + "External id": 980702,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938196383.639, "dur": 22.310, + "args": { + "External id": 980703,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938196412.340, "dur": 32.738, + "args": { + "External id": 980704,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938196453.186, "dur": 47.496, + "args": { + "External id": 980705,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938196516.256, "dur": 33.934, + "args": { + "External id": 980706,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345938196570.917, "dur": 24.479, + "args": { + "External id": 980707,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938196612.843, "dur": 25.422, + "args": { + "External id": 980708,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938196652.063, "dur": 20.969, + "args": { + "External id": 980709,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938196689.931, "dur": 15.061, + "args": { + "External id": 980710,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345938196717.799, "dur": 16.070, + "args": { + "External id": 980711,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938196820.541, "dur": 15.897, + "args": { + "External id": 980712,"Record function id": 0, "Ev Idx": 3303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938196824.059, "dur": 11.386, + "args": { + "External id": 980713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938196828.561, "dur": 5.972, + "args": { + "External id": 980714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938196830.214, "dur": 4.197, + "args": { + "External id": 980715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938196840.405, "dur": 5.524, + "args": { + "External id": 980716,"Record function id": 0, "Ev Idx": 3307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938196842.070, "dur": 3.428, + "args": { + "External id": 980717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938196842.998, "dur": 1.970, + "args": { + "External id": 980718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938196843.761, "dur": 1.100, + "args": { + "External id": 980719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938196849.539, "dur": 6.343, + "args": { + "External id": 980720,"Record function id": 0, "Ev Idx": 3311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938196850.524, "dur": 4.936, + "args": { + "External id": 980721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938196851.133, "dur": 3.857, + "args": { + "External id": 980722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938196851.678, "dur": 3.244, + "args": { + "External id": 980723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938196859.438, "dur": 4.284, + "args": { + "External id": 980724,"Record function id": 0, "Ev Idx": 3315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938196860.787, "dur": 2.502, + "args": { + "External id": 980725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938196861.427, "dur": 1.313, + "args": { + "External id": 980726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938196861.754, "dur": 0.917, + "args": { + "External id": 980727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938196867.011, "dur": 4.442, + "args": { + "External id": 980728,"Record function id": 0, "Ev Idx": 3319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938196868.230, "dur": 2.802, + "args": { + "External id": 980729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938196868.841, "dur": 1.708, + "args": { + "External id": 980730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938196869.552, "dur": 0.933, + "args": { + "External id": 980731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938196874.762, "dur": 4.188, + "args": { + "External id": 980732,"Record function id": 0, "Ev Idx": 3323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938196875.852, "dur": 2.684, + "args": { + "External id": 980733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938196876.539, "dur": 1.531, + "args": { + "External id": 980734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938196877.032, "dur": 0.932, + "args": { + "External id": 980735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938196882.120, "dur": 3.416, + "args": { + "External id": 980736,"Record function id": 0, "Ev Idx": 3327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938196883.169, "dur": 1.984, + "args": { + "External id": 980737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938196883.671, "dur": 1.038, + "args": { + "External id": 980738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938196883.949, "dur": 0.666, + "args": { + "External id": 980739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938196888.679, "dur": 3.786, + "args": { + "External id": 980740,"Record function id": 0, "Ev Idx": 3331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938196889.890, "dur": 2.162, + "args": { + "External id": 980741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938196890.476, "dur": 1.039, + "args": { + "External id": 980742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938196890.852, "dur": 0.585, + "args": { + "External id": 980743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938196895.600, "dur": 3.919, + "args": { + "External id": 980744,"Record function id": 0, "Ev Idx": 3335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938196896.885, "dur": 2.230, + "args": { + "External id": 980745,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938196897.358, "dur": 1.240, + "args": { + "External id": 980746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938196897.917, "dur": 0.586, + "args": { + "External id": 980747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938196903.298, "dur": 64563.945, + "args": { + "External id": 980748,"Record function id": 0, "Sequence number": 10552273, "Fwd thread id": 1, "Ev Idx": 3339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938196904.682, "dur": 64553.410, + "args": { + "External id": 980749,"Sequence number": 10552273, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3340 + } + }, + { + "ph": "f", "id": 193, "pid": 2338706, "tid": 2379422, "ts": 6345938196904.682, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.15)", "pid": 2338706, "tid": 2379422, + "ts": 6345938196935.336, "dur": 39.166, + "args": { + "External id": 980750,"Record function id": 0, "Ev Idx": 3341 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.15)", "pid": 2338706, "tid": 2379422, + "ts": 6345938196982.315, "dur": 137.150, + "args": { + "External id": 980751,"Record function id": 0, "Ev Idx": 3342 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.15)", "pid": 2338706, "tid": 2379422, + "ts": 6345938197127.769, "dur": 64321.753, + "args": { + "External id": 980752,"Record function id": 0, "Ev Idx": 3343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938197223.668, "dur": 7.920, + "args": { + "External id": 980753,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938197242.556, "dur": 7.004, + "args": { + "External id": 980754,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938197265.284, "dur": 63268.255, + "args": { + "External id": 980755,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938197279.231, "dur": 63240.902, + "args": { + "External id": 980756,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938197408.401, "dur": 18.015, + "args": { + "External id": 980757,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938197446.177, "dur": 63026.915, + "args": { + "External id": 980758,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938197449.652, "dur": 63022.362, + "args": { + "External id": 980759,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938197454.555, "dur": 10.592, + "args": { + "External id": 980760,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938197467.656, "dur": 62999.445, + "args": { + "External id": 980761,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938260646.321, "dur": 12.680, + "args": { + "External id": 980762,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938260650.210, "dur": 8.391, + "args": { + "External id": 980763,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938260691.275, "dur": 436.053, + "args": { + "External id": 980764,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938260720.227, "dur": 400.935, + "args": { + "External id": 980765,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3356, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938260731.443, "dur": 383.638, + "args": { + "External id": 980766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938261149.228, "dur": 2.431, + "args": { + "External id": 980767,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3358, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938261220.468, "dur": 6.562, + "args": { + "External id": 980768,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938261276.699, "dur": 1.748, + "args": { + "External id": 980769,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938261296.056, "dur": 3.468, + "args": { + "External id": 980770,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938261311.818, "dur": 1.084, + "args": { + "External id": 980771,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938261323.696, "dur": 0.815, + "args": { + "External id": 980772,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938261336.673, "dur": 0.953, + "args": { + "External id": 980773,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938261349.370, "dur": 2.956, + "args": { + "External id": 980774,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938261371.935, "dur": 3.074, + "args": { + "External id": 980775,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938261386.844, "dur": 0.952, + "args": { + "External id": 980776,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938261481.988, "dur": 3014.748, + "args": { + "External id": 980777,"Record function id": 0, "Ev Idx": 3368 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.14)", "pid": 2338706, "tid": 2379422, + "ts": 6345938261503.099, "dur": 1116.974, + "args": { + "External id": 980778,"Record function id": 0, "Ev Idx": 3369 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338706, "tid": 2379422, + "ts": 6345938261520.586, "dur": 323.246, + "args": { + "External id": 980779,"Record function id": 0, "Ev Idx": 3370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938261611.079, "dur": 4.649, + "args": { + "External id": 980780,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938261619.015, "dur": 1.232, + "args": { + "External id": 980781,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938261622.357, "dur": 2.992, + "args": { + "External id": 980782,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938261626.882, "dur": 1.038, + "args": { + "External id": 980783,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938261629.186, "dur": 1.291, + "args": { + "External id": 980784,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938261632.109, "dur": 0.860, + "args": { + "External id": 980785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938261634.476, "dur": 2.647, + "args": { + "External id": 980786,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938261638.683, "dur": 0.956, + "args": { + "External id": 980787,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938261641.070, "dur": 0.893, + "args": { + "External id": 980788,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938261643.522, "dur": 0.781, + "args": { + "External id": 980789,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938261661.618, "dur": 151.430, + "args": { + "External id": 980790,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938261677.748, "dur": 131.082, + "args": { + "External id": 980791,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938261694.832, "dur": 17.278, + "args": { + "External id": 980792,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938261716.304, "dur": 63.547, + "args": { + "External id": 980793,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938261719.145, "dur": 60.263, + "args": { + "External id": 980794,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938261722.837, "dur": 5.113, + "args": { + "External id": 980795,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938261729.869, "dur": 49.066, + "args": { + "External id": 980796,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.13", "pid": 2338706, "tid": 2379422, + "ts": 6345938261936.613, "dur": 674.135, + "args": { + "External id": 980797,"Record function id": 0, "Ev Idx": 3388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338706, "tid": 2379422, + "ts": 6345938261952.539, "dur": 644.334, + "args": { + "External id": 980798,"Record function id": 0, "Ev Idx": 3389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938262035.725, "dur": 7.073, + "args": { + "External id": 980799,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938262097.144, "dur": 52.103, + "args": { + "External id": 980800,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938262119.858, "dur": 2.266, + "args": { + "External id": 980801,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938262124.967, "dur": 0.645, + "args": { + "External id": 980802,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938262127.442, "dur": 0.608, + "args": { + "External id": 980803,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938262129.715, "dur": 0.575, + "args": { + "External id": 980804,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938262131.875, "dur": 0.479, + "args": { + "External id": 980805,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938262134.131, "dur": 2.821, + "args": { + "External id": 980806,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938262138.346, "dur": 0.442, + "args": { + "External id": 980807,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938262140.493, "dur": 0.529, + "args": { + "External id": 980808,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938262142.484, "dur": 0.462, + "args": { + "External id": 980809,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938262161.352, "dur": 53.185, + "args": { + "External id": 980810,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345938262249.979, "dur": 119.161, + "args": { + "External id": 980811,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938262261.595, "dur": 4.725, + "args": { + "External id": 980812,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345938262272.145, "dur": 11.345, + "args": { + "External id": 980813,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345938262276.752, "dur": 6.170, + "args": { + "External id": 980814,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938262281.076, "dur": 0.588, + "args": { + "External id": 980815,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938262290.759, "dur": 26.554, + "args": { + "External id": 980816,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938262292.819, "dur": 0.596, + "args": { + "External id": 980817,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938262295.244, "dur": 0.400, + "args": { + "External id": 980818,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938262297.139, "dur": 2.116, + "args": { + "External id": 980819,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938262301.102, "dur": 0.376, + "args": { + "External id": 980820,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938262302.992, "dur": 0.556, + "args": { + "External id": 980821,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938262304.951, "dur": 0.576, + "args": { + "External id": 980822,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938262307.146, "dur": 0.485, + "args": { + "External id": 980823,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938262309.269, "dur": 0.495, + "args": { + "External id": 980824,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938262311.259, "dur": 0.403, + "args": { + "External id": 980825,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938262330.260, "dur": 30.669, + "args": { + "External id": 980826,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938262419.801, "dur": 111.604, + "args": { + "External id": 980827,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938262442.974, "dur": 84.686, + "args": { + "External id": 980828,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3419, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938262453.275, "dur": 69.946, + "args": { + "External id": 980829,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938262544.684, "dur": 1.922, + "args": { + "External id": 980830,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3421, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938262627.573, "dur": 1846.162, + "args": { + "External id": 980831,"Sequence number": 10552272, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3422 + } + }, + { + "ph": "f", "id": 194, "pid": 2338706, "tid": 2379422, "ts": 6345938262627.573, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938262741.616, "dur": 103.410, + "args": { + "External id": 980832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938262882.764, "dur": 41.384, + "args": { + "External id": 980833,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345938262942.978, "dur": 49.096, + "args": { + "External id": 980834,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938263004.675, "dur": 93.767, + "args": { + "External id": 980835,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938263111.087, "dur": 39.086, + "args": { + "External id": 980836,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938263159.066, "dur": 30.236, + "args": { + "External id": 980837,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938263197.816, "dur": 31.332, + "args": { + "External id": 980838,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938263256.833, "dur": 28.244, + "args": { + "External id": 980839,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938263304.260, "dur": 28.585, + "args": { + "External id": 980840,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938263353.748, "dur": 21.909, + "args": { + "External id": 980841,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938263388.714, "dur": 15.971, + "args": { + "External id": 980842,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938263414.256, "dur": 37.000, + "args": { + "External id": 980843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938263455.549, "dur": 34.954, + "args": { + "External id": 980844,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345938263521.619, "dur": 257.410, + "args": { + "External id": 980845,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938263607.085, "dur": 6.051, + "args": { + "External id": 980846,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938263615.300, "dur": 2.755, + "args": { + "External id": 980847,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938263619.281, "dur": 1.695, + "args": { + "External id": 980848,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938263622.197, "dur": 2.030, + "args": { + "External id": 980849,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938263671.295, "dur": 5.189, + "args": { + "External id": 980850,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938263673.419, "dur": 2.885, + "args": { + "External id": 980851,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938263679.034, "dur": 35.528, + "args": { + "External id": 980852,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938263684.590, "dur": 3.966, + "args": { + "External id": 980853,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938263716.287, "dur": 1.762, + "args": { + "External id": 980854,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938263717.213, "dur": 0.766, + "args": { + "External id": 980855,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938263719.421, "dur": 14.754, + "args": { + "External id": 980856,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938263720.984, "dur": 0.637, + "args": { + "External id": 980857,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938263816.163, "dur": 29.246, + "args": { + "External id": 980858,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938263862.750, "dur": 16.549, + "args": { + "External id": 980859,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938263887.822, "dur": 41.311, + "args": { + "External id": 980860,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938263936.536, "dur": 40.055, + "args": { + "External id": 980861,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938263988.036, "dur": 42.294, + "args": { + "External id": 980862,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938264040.604, "dur": 73.596, + "args": { + "External id": 980863,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938264126.702, "dur": 32.022, + "args": { + "External id": 980864,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938264185.183, "dur": 45.003, + "args": { + "External id": 980865,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345938264253.578, "dur": 33.342, + "args": { + "External id": 980866,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938264304.459, "dur": 26.574, + "args": { + "External id": 980867,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938264350.916, "dur": 20.153, + "args": { + "External id": 980868,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938264387.807, "dur": 19.285, + "args": { + "External id": 980869,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345938264419.704, "dur": 18.955, + "args": { + "External id": 980870,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938264519.762, "dur": 16.410, + "args": { + "External id": 980871,"Record function id": 0, "Ev Idx": 3462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938264523.345, "dur": 11.775, + "args": { + "External id": 980872,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938264527.819, "dur": 6.133, + "args": { + "External id": 980873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938264529.579, "dur": 4.241, + "args": { + "External id": 980874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938264540.292, "dur": 5.738, + "args": { + "External id": 980875,"Record function id": 0, "Ev Idx": 3466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938264541.905, "dur": 3.669, + "args": { + "External id": 980876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938264542.844, "dur": 2.230, + "args": { + "External id": 980877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938264543.883, "dur": 1.023, + "args": { + "External id": 980878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938264549.266, "dur": 7.028, + "args": { + "External id": 980879,"Record function id": 0, "Ev Idx": 3470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938264550.636, "dur": 5.220, + "args": { + "External id": 980880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938264551.143, "dur": 4.076, + "args": { + "External id": 980881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938264551.559, "dur": 3.592, + "args": { + "External id": 980882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938264559.585, "dur": 4.258, + "args": { + "External id": 980883,"Record function id": 0, "Ev Idx": 3474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938264560.887, "dur": 2.520, + "args": { + "External id": 980884,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938264561.484, "dur": 1.482, + "args": { + "External id": 980885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938264562.058, "dur": 0.847, + "args": { + "External id": 980886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938264567.009, "dur": 4.783, + "args": { + "External id": 980887,"Record function id": 0, "Ev Idx": 3478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938264568.412, "dur": 2.941, + "args": { + "External id": 980888,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938264569.340, "dur": 1.552, + "args": { + "External id": 980889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938264570.085, "dur": 0.741, + "args": { + "External id": 980890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938264575.066, "dur": 3.946, + "args": { + "External id": 980891,"Record function id": 0, "Ev Idx": 3482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938264576.409, "dur": 2.190, + "args": { + "External id": 980892,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938264576.883, "dur": 1.293, + "args": { + "External id": 980893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938264577.459, "dur": 0.597, + "args": { + "External id": 980894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938264582.183, "dur": 4.932, + "args": { + "External id": 980895,"Record function id": 0, "Ev Idx": 3486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938264583.389, "dur": 3.281, + "args": { + "External id": 980896,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938264583.983, "dur": 1.303, + "args": { + "External id": 980897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938264584.389, "dur": 0.809, + "args": { + "External id": 980898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938264590.365, "dur": 3.775, + "args": { + "External id": 980899,"Record function id": 0, "Ev Idx": 3490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938264591.693, "dur": 2.030, + "args": { + "External id": 980900,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938264592.425, "dur": 0.881, + "args": { + "External id": 980901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938264592.700, "dur": 0.542, + "args": { + "External id": 980902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938264597.223, "dur": 4.156, + "args": { + "External id": 980903,"Record function id": 0, "Ev Idx": 3494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938264598.483, "dur": 2.473, + "args": { + "External id": 980904,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938264598.919, "dur": 1.644, + "args": { + "External id": 980905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938264599.319, "dur": 1.148, + "args": { + "External id": 980906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938264605.477, "dur": 65550.528, + "args": { + "External id": 980907,"Record function id": 0, "Sequence number": 10552271, "Fwd thread id": 1, "Ev Idx": 3498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938264606.600, "dur": 65537.034, + "args": { + "External id": 980908,"Sequence number": 10552271, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3499 + } + }, + { + "ph": "f", "id": 195, "pid": 2338706, "tid": 2379422, "ts": 6345938264606.600, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.14)", "pid": 2338706, "tid": 2379422, + "ts": 6345938264637.144, "dur": 40.143, + "args": { + "External id": 980909,"Record function id": 0, "Ev Idx": 3500 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.14)", "pid": 2338706, "tid": 2379422, + "ts": 6345938264684.977, "dur": 66.212, + "args": { + "External id": 980910,"Record function id": 0, "Ev Idx": 3501 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.14)", "pid": 2338706, "tid": 2379422, + "ts": 6345938264757.094, "dur": 65377.263, + "args": { + "External id": 980911,"Record function id": 0, "Ev Idx": 3502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938264845.967, "dur": 6.768, + "args": { + "External id": 980912,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938264862.449, "dur": 6.064, + "args": { + "External id": 980913,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938264884.236, "dur": 64297.846, + "args": { + "External id": 980914,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938264898.096, "dur": 64270.230, + "args": { + "External id": 980915,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938265042.158, "dur": 59.154, + "args": { + "External id": 980916,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938265124.521, "dur": 63993.321, + "args": { + "External id": 980917,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938265128.001, "dur": 63988.677, + "args": { + "External id": 980918,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938265133.501, "dur": 13.037, + "args": { + "External id": 980919,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938265148.911, "dur": 63962.564, + "args": { + "External id": 980920,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938329300.643, "dur": 12.955, + "args": { + "External id": 980921,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938329304.954, "dur": 8.209, + "args": { + "External id": 980922,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938329346.512, "dur": 418.948, + "args": { + "External id": 980923,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938329376.590, "dur": 383.421, + "args": { + "External id": 980924,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3515, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938329388.256, "dur": 366.606, + "args": { + "External id": 980925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938329785.228, "dur": 2.309, + "args": { + "External id": 980926,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3517, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938329851.459, "dur": 6.466, + "args": { + "External id": 980927,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938329907.283, "dur": 1.472, + "args": { + "External id": 980928,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938329926.110, "dur": 3.818, + "args": { + "External id": 980929,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938329943.553, "dur": 1.262, + "args": { + "External id": 980930,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938329956.489, "dur": 0.900, + "args": { + "External id": 980931,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938329969.219, "dur": 1.085, + "args": { + "External id": 980932,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938329981.875, "dur": 3.240, + "args": { + "External id": 980933,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938329996.433, "dur": 2.236, + "args": { + "External id": 980934,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330029.200, "dur": 1.578, + "args": { + "External id": 980935,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938330173.918, "dur": 3224.664, + "args": { + "External id": 980936,"Record function id": 0, "Ev Idx": 3527 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.13)", "pid": 2338706, "tid": 2379422, + "ts": 6345938330196.071, "dur": 1199.105, + "args": { + "External id": 980937,"Record function id": 0, "Ev Idx": 3528 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338706, "tid": 2379422, + "ts": 6345938330213.040, "dur": 361.649, + "args": { + "External id": 980938,"Record function id": 0, "Ev Idx": 3529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938330313.164, "dur": 5.088, + "args": { + "External id": 980939,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938330321.798, "dur": 1.189, + "args": { + "External id": 980940,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938330325.025, "dur": 3.298, + "args": { + "External id": 980941,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938330329.793, "dur": 1.348, + "args": { + "External id": 980942,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938330332.507, "dur": 0.925, + "args": { + "External id": 980943,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938330335.087, "dur": 1.062, + "args": { + "External id": 980944,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938330337.891, "dur": 2.781, + "args": { + "External id": 980945,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938330342.230, "dur": 1.154, + "args": { + "External id": 980946,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938330344.932, "dur": 1.081, + "args": { + "External id": 980947,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938330347.492, "dur": 0.884, + "args": { + "External id": 980948,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938330367.346, "dur": 174.011, + "args": { + "External id": 980949,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938330385.393, "dur": 150.920, + "args": { + "External id": 980950,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938330413.983, "dur": 18.700, + "args": { + "External id": 980951,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938330436.873, "dur": 67.852, + "args": { + "External id": 980952,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938330440.115, "dur": 64.225, + "args": { + "External id": 980953,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330444.935, "dur": 5.212, + "args": { + "External id": 980954,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938330451.990, "dur": 51.687, + "args": { + "External id": 980955,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3546 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.12", "pid": 2338706, "tid": 2379422, + "ts": 6345938330671.719, "dur": 713.964, + "args": { + "External id": 980956,"Record function id": 0, "Ev Idx": 3547 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338706, "tid": 2379422, + "ts": 6345938330690.077, "dur": 681.501, + "args": { + "External id": 980957,"Record function id": 0, "Ev Idx": 3548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938330753.014, "dur": 5.305, + "args": { + "External id": 980958,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938330773.722, "dur": 64.898, + "args": { + "External id": 980959,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330779.309, "dur": 1.782, + "args": { + "External id": 980960,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330783.568, "dur": 0.436, + "args": { + "External id": 980961,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330785.998, "dur": 0.388, + "args": { + "External id": 980962,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330788.067, "dur": 0.357, + "args": { + "External id": 980963,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330790.101, "dur": 0.585, + "args": { + "External id": 980964,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330792.256, "dur": 3.282, + "args": { + "External id": 980965,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330797.227, "dur": 0.700, + "args": { + "External id": 980966,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330829.875, "dur": 0.888, + "args": { + "External id": 980967,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330832.732, "dur": 0.419, + "args": { + "External id": 980968,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938330857.098, "dur": 44.913, + "args": { + "External id": 980969,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345938330935.917, "dur": 182.788, + "args": { + "External id": 980970,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938330949.255, "dur": 3.613, + "args": { + "External id": 980971,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345938330958.775, "dur": 11.377, + "args": { + "External id": 980972,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345938330963.599, "dur": 6.114, + "args": { + "External id": 980973,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330967.876, "dur": 0.623, + "args": { + "External id": 980974,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938330977.386, "dur": 28.232, + "args": { + "External id": 980975,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330979.817, "dur": 0.492, + "args": { + "External id": 980976,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330982.222, "dur": 0.313, + "args": { + "External id": 980977,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330984.389, "dur": 2.460, + "args": { + "External id": 980978,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330988.635, "dur": 0.587, + "args": { + "External id": 980979,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330990.646, "dur": 0.606, + "args": { + "External id": 980980,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330992.950, "dur": 0.705, + "args": { + "External id": 980981,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330995.375, "dur": 0.557, + "args": { + "External id": 980982,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938330997.804, "dur": 0.427, + "args": { + "External id": 980983,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938331000.149, "dur": 0.751, + "args": { + "External id": 980984,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938331037.416, "dur": 70.182, + "args": { + "External id": 980985,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938331177.879, "dur": 120.827, + "args": { + "External id": 980986,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938331203.832, "dur": 90.504, + "args": { + "External id": 980987,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3578, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938331214.161, "dur": 75.808, + "args": { + "External id": 980988,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938331314.503, "dur": 1.993, + "args": { + "External id": 980989,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3580, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938331402.971, "dur": 1968.684, + "args": { + "External id": 980990,"Sequence number": 10552270, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3581 + } + }, + { + "ph": "f", "id": 196, "pid": 2338706, "tid": 2379422, "ts": 6345938331402.971, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938331522.163, "dur": 107.693, + "args": { + "External id": 980991,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938331669.770, "dur": 42.251, + "args": { + "External id": 980992,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345938331844.199, "dur": 58.891, + "args": { + "External id": 980993,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938331918.377, "dur": 32.705, + "args": { + "External id": 980994,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938331958.704, "dur": 33.970, + "args": { + "External id": 980995,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938332000.258, "dur": 50.918, + "args": { + "External id": 980996,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938332098.480, "dur": 36.324, + "args": { + "External id": 980997,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938332167.839, "dur": 28.631, + "args": { + "External id": 980998,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938332217.113, "dur": 29.147, + "args": { + "External id": 980999,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938332267.719, "dur": 20.184, + "args": { + "External id": 981000,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938332301.199, "dur": 14.741, + "args": { + "External id": 981001,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938332324.956, "dur": 36.460, + "args": { + "External id": 981002,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938332365.419, "dur": 33.121, + "args": { + "External id": 981003,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345938332432.353, "dur": 261.927, + "args": { + "External id": 981004,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938332519.760, "dur": 6.358, + "args": { + "External id": 981005,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938332528.253, "dur": 2.316, + "args": { + "External id": 981006,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938332531.900, "dur": 1.960, + "args": { + "External id": 981007,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938332534.908, "dur": 2.158, + "args": { + "External id": 981008,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938332583.374, "dur": 5.589, + "args": { + "External id": 981009,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938332585.754, "dur": 3.041, + "args": { + "External id": 981010,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938332591.300, "dur": 36.363, + "args": { + "External id": 981011,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938332596.594, "dur": 4.609, + "args": { + "External id": 981012,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938332629.495, "dur": 1.552, + "args": { + "External id": 981013,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938332630.352, "dur": 0.604, + "args": { + "External id": 981014,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938332632.925, "dur": 15.154, + "args": { + "External id": 981015,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938332634.974, "dur": 0.567, + "args": { + "External id": 981016,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938332731.458, "dur": 27.023, + "args": { + "External id": 981017,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938332774.952, "dur": 16.269, + "args": { + "External id": 981018,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938332799.647, "dur": 50.221, + "args": { + "External id": 981019,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938332862.803, "dur": 45.409, + "args": { + "External id": 981020,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938332921.412, "dur": 21.404, + "args": { + "External id": 981021,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938332949.381, "dur": 33.969, + "args": { + "External id": 981022,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938332991.976, "dur": 54.139, + "args": { + "External id": 981023,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938333105.313, "dur": 36.101, + "args": { + "External id": 981024,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345938333165.521, "dur": 26.420, + "args": { + "External id": 981025,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938333209.037, "dur": 28.772, + "args": { + "External id": 981026,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938333252.536, "dur": 17.064, + "args": { + "External id": 981027,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938333287.960, "dur": 14.499, + "args": { + "External id": 981028,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345938333316.229, "dur": 18.783, + "args": { + "External id": 981029,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938333421.646, "dur": 20.407, + "args": { + "External id": 981030,"Record function id": 0, "Ev Idx": 3621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938333425.257, "dur": 15.829, + "args": { + "External id": 981031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938333433.564, "dur": 6.389, + "args": { + "External id": 981032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938333435.455, "dur": 4.374, + "args": { + "External id": 981033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938333446.014, "dur": 5.793, + "args": { + "External id": 981034,"Record function id": 0, "Ev Idx": 3625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938333447.565, "dur": 3.756, + "args": { + "External id": 981035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938333448.451, "dur": 2.410, + "args": { + "External id": 981036,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938333449.608, "dur": 1.139, + "args": { + "External id": 981037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938333455.101, "dur": 6.980, + "args": { + "External id": 981038,"Record function id": 0, "Ev Idx": 3629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938333456.618, "dur": 5.045, + "args": { + "External id": 981039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938333457.211, "dur": 3.988, + "args": { + "External id": 981040,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938333457.710, "dur": 3.418, + "args": { + "External id": 981041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938333465.307, "dur": 8.812, + "args": { + "External id": 981042,"Record function id": 0, "Ev Idx": 3633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938333466.801, "dur": 6.912, + "args": { + "External id": 981043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938333472.103, "dur": 1.148, + "args": { + "External id": 981044,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938333472.454, "dur": 0.727, + "args": { + "External id": 981045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938333477.499, "dur": 4.284, + "args": { + "External id": 981046,"Record function id": 0, "Ev Idx": 3637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938333478.955, "dur": 2.417, + "args": { + "External id": 981047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938333479.535, "dur": 1.360, + "args": { + "External id": 981048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938333480.061, "dur": 0.762, + "args": { + "External id": 981049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938333485.000, "dur": 4.287, + "args": { + "External id": 981050,"Record function id": 0, "Ev Idx": 3641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938333486.044, "dur": 2.812, + "args": { + "External id": 981051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938333486.817, "dur": 1.589, + "args": { + "External id": 981052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938333487.443, "dur": 0.823, + "args": { + "External id": 981053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938333492.506, "dur": 4.057, + "args": { + "External id": 981054,"Record function id": 0, "Ev Idx": 3645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938333493.869, "dur": 2.299, + "args": { + "External id": 981055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938333494.490, "dur": 1.205, + "args": { + "External id": 981056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938333494.912, "dur": 0.696, + "args": { + "External id": 981057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938333499.716, "dur": 3.387, + "args": { + "External id": 981058,"Record function id": 0, "Ev Idx": 3649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938333500.732, "dur": 1.978, + "args": { + "External id": 981059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938333501.359, "dur": 0.953, + "args": { + "External id": 981060,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938333501.621, "dur": 0.617, + "args": { + "External id": 981061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938333506.308, "dur": 3.824, + "args": { + "External id": 981062,"Record function id": 0, "Ev Idx": 3653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938333507.483, "dur": 2.227, + "args": { + "External id": 981063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938333507.943, "dur": 1.321, + "args": { + "External id": 981064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938333508.618, "dur": 0.552, + "args": { + "External id": 981065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938333514.313, "dur": 65271.583, + "args": { + "External id": 981066,"Record function id": 0, "Sequence number": 10552269, "Fwd thread id": 1, "Ev Idx": 3657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938333515.792, "dur": 65261.233, + "args": { + "External id": 981067,"Sequence number": 10552269, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3658 + } + }, + { + "ph": "f", "id": 197, "pid": 2338706, "tid": 2379422, "ts": 6345938333515.792, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.13)", "pid": 2338706, "tid": 2379422, + "ts": 6345938333547.968, "dur": 39.167, + "args": { + "External id": 981068,"Record function id": 0, "Ev Idx": 3659 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.13)", "pid": 2338706, "tid": 2379422, + "ts": 6345938333594.552, "dur": 70.445, + "args": { + "External id": 981069,"Record function id": 0, "Ev Idx": 3660 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.13)", "pid": 2338706, "tid": 2379422, + "ts": 6345938333670.660, "dur": 65098.582, + "args": { + "External id": 981070,"Record function id": 0, "Ev Idx": 3661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938333764.600, "dur": 6.797, + "args": { + "External id": 981071,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938333781.846, "dur": 6.188, + "args": { + "External id": 981072,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938333802.943, "dur": 64060.393, + "args": { + "External id": 981073,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938333817.529, "dur": 64031.698, + "args": { + "External id": 981074,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938333928.863, "dur": 19.954, + "args": { + "External id": 981075,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938333969.137, "dur": 63829.419, + "args": { + "External id": 981076,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938333972.821, "dur": 63824.665, + "args": { + "External id": 981077,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938333977.795, "dur": 9.303, + "args": { + "External id": 981078,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938333989.528, "dur": 63802.657, + "args": { + "External id": 981079,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938397981.532, "dur": 13.373, + "args": { + "External id": 981080,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938397985.907, "dur": 8.695, + "args": { + "External id": 981081,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938398041.565, "dur": 412.326, + "args": { + "External id": 981082,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938398099.973, "dur": 348.442, + "args": { + "External id": 981083,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3674, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938398113.539, "dur": 328.470, + "args": { + "External id": 981084,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938398474.752, "dur": 2.263, + "args": { + "External id": 981085,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3676, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938398543.996, "dur": 7.363, + "args": { + "External id": 981086,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938398600.102, "dur": 1.716, + "args": { + "External id": 981087,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938398619.907, "dur": 3.454, + "args": { + "External id": 981088,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938398636.695, "dur": 1.194, + "args": { + "External id": 981089,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938398650.229, "dur": 1.052, + "args": { + "External id": 981090,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938398663.503, "dur": 1.235, + "args": { + "External id": 981091,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938398677.039, "dur": 3.441, + "args": { + "External id": 981092,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938398691.925, "dur": 2.636, + "args": { + "External id": 981093,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938398705.489, "dur": 1.129, + "args": { + "External id": 981094,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938398801.121, "dur": 3095.504, + "args": { + "External id": 981095,"Record function id": 0, "Ev Idx": 3686 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.12)", "pid": 2338706, "tid": 2379422, + "ts": 6345938398821.080, "dur": 1222.697, + "args": { + "External id": 981096,"Record function id": 0, "Ev Idx": 3687 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338706, "tid": 2379422, + "ts": 6345938398836.207, "dur": 456.160, + "args": { + "External id": 981097,"Record function id": 0, "Ev Idx": 3688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938398928.416, "dur": 4.346, + "args": { + "External id": 981098,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938398936.101, "dur": 0.888, + "args": { + "External id": 981099,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938398938.981, "dur": 3.180, + "args": { + "External id": 981100,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938398943.779, "dur": 1.097, + "args": { + "External id": 981101,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938398946.363, "dur": 1.018, + "args": { + "External id": 981102,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938398949.183, "dur": 0.852, + "args": { + "External id": 981103,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938398951.647, "dur": 2.694, + "args": { + "External id": 981104,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938398955.835, "dur": 0.849, + "args": { + "External id": 981105,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938398958.603, "dur": 0.922, + "args": { + "External id": 981106,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938398961.627, "dur": 1.034, + "args": { + "External id": 981107,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938398980.890, "dur": 270.192, + "args": { + "External id": 981108,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938399098.271, "dur": 146.947, + "args": { + "External id": 981109,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938399118.245, "dur": 19.201, + "args": { + "External id": 981110,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938399141.791, "dur": 70.852, + "args": { + "External id": 981111,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938399144.906, "dur": 67.381, + "args": { + "External id": 981112,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399149.323, "dur": 6.290, + "args": { + "External id": 981113,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938399157.830, "dur": 53.924, + "args": { + "External id": 981114,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.11", "pid": 2338706, "tid": 2379422, + "ts": 6345938399396.674, "dur": 636.803, + "args": { + "External id": 981115,"Record function id": 0, "Ev Idx": 3706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338706, "tid": 2379422, + "ts": 6345938399415.408, "dur": 581.551, + "args": { + "External id": 981116,"Record function id": 0, "Ev Idx": 3707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938399489.036, "dur": 5.072, + "args": { + "External id": 981117,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938399509.723, "dur": 33.939, + "args": { + "External id": 981118,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399515.428, "dur": 2.031, + "args": { + "External id": 981119,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399520.045, "dur": 0.454, + "args": { + "External id": 981120,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399522.765, "dur": 0.493, + "args": { + "External id": 981121,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399525.057, "dur": 0.642, + "args": { + "External id": 981122,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399527.616, "dur": 0.485, + "args": { + "External id": 981123,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399530.009, "dur": 2.140, + "args": { + "External id": 981124,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399533.865, "dur": 0.474, + "args": { + "External id": 981125,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399536.026, "dur": 0.372, + "args": { + "External id": 981126,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399538.217, "dur": 0.442, + "args": { + "External id": 981127,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938399556.775, "dur": 42.666, + "args": { + "External id": 981128,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345938399647.575, "dur": 117.671, + "args": { + "External id": 981129,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938399659.653, "dur": 3.491, + "args": { + "External id": 981130,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345938399669.247, "dur": 11.367, + "args": { + "External id": 981131,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345938399674.005, "dur": 6.210, + "args": { + "External id": 981132,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399678.045, "dur": 0.779, + "args": { + "External id": 981133,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938399687.893, "dur": 27.237, + "args": { + "External id": 981134,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399690.306, "dur": 0.535, + "args": { + "External id": 981135,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399692.782, "dur": 0.500, + "args": { + "External id": 981136,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399695.244, "dur": 2.497, + "args": { + "External id": 981137,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399699.336, "dur": 0.401, + "args": { + "External id": 981138,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399701.927, "dur": 0.354, + "args": { + "External id": 981139,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399703.734, "dur": 0.447, + "args": { + "External id": 981140,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399706.189, "dur": 0.443, + "args": { + "External id": 981141,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399708.121, "dur": 0.436, + "args": { + "External id": 981142,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938399710.336, "dur": 0.358, + "args": { + "External id": 981143,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938399726.301, "dur": 31.320, + "args": { + "External id": 981144,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938399813.932, "dur": 114.776, + "args": { + "External id": 981145,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938399836.238, "dur": 88.759, + "args": { + "External id": 981146,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3737, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938399846.250, "dur": 74.131, + "args": { + "External id": 981147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938399944.247, "dur": 1.918, + "args": { + "External id": 981148,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3739, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938400090.266, "dur": 1782.098, + "args": { + "External id": 981149,"Sequence number": 10552268, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3740 + } + }, + { + "ph": "f", "id": 198, "pid": 2338706, "tid": 2379422, "ts": 6345938400090.266, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938400213.350, "dur": 115.537, + "args": { + "External id": 981150,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938400371.417, "dur": 43.304, + "args": { + "External id": 981151,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345938400433.823, "dur": 52.136, + "args": { + "External id": 981152,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938400498.887, "dur": 33.638, + "args": { + "External id": 981153,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938400539.854, "dur": 33.440, + "args": { + "External id": 981154,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938400580.833, "dur": 28.537, + "args": { + "External id": 981155,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938400617.243, "dur": 28.953, + "args": { + "External id": 981156,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938400670.839, "dur": 23.514, + "args": { + "External id": 981157,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938400712.369, "dur": 28.779, + "args": { + "External id": 981158,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938400761.243, "dur": 19.602, + "args": { + "External id": 981159,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938400794.461, "dur": 15.074, + "args": { + "External id": 981160,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938400818.773, "dur": 35.840, + "args": { + "External id": 981161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938400858.577, "dur": 32.918, + "args": { + "External id": 981162,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345938400933.499, "dur": 323.463, + "args": { + "External id": 981163,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938401033.680, "dur": 7.319, + "args": { + "External id": 981164,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938401043.466, "dur": 2.762, + "args": { + "External id": 981165,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938401047.421, "dur": 1.928, + "args": { + "External id": 981166,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938401050.817, "dur": 35.932, + "args": { + "External id": 981167,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938401138.744, "dur": 5.150, + "args": { + "External id": 981168,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938401140.919, "dur": 2.806, + "args": { + "External id": 981169,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938401146.370, "dur": 37.127, + "args": { + "External id": 981170,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938401152.500, "dur": 4.228, + "args": { + "External id": 981171,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938401185.124, "dur": 1.871, + "args": { + "External id": 981172,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938401186.232, "dur": 0.678, + "args": { + "External id": 981173,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938401188.410, "dur": 16.545, + "args": { + "External id": 981174,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938401190.062, "dur": 0.569, + "args": { + "External id": 981175,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938401297.350, "dur": 28.171, + "args": { + "External id": 981176,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938401341.990, "dur": 15.961, + "args": { + "External id": 981177,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938401366.475, "dur": 49.884, + "args": { + "External id": 981178,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938401423.762, "dur": 53.184, + "args": { + "External id": 981179,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938401495.097, "dur": 28.411, + "args": { + "External id": 981180,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938401531.062, "dur": 38.217, + "args": { + "External id": 981181,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938401576.977, "dur": 34.045, + "args": { + "External id": 981182,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938401618.631, "dur": 31.923, + "args": { + "External id": 981183,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345938401674.166, "dur": 25.872, + "args": { + "External id": 981184,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938401715.292, "dur": 26.603, + "args": { + "External id": 981185,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938401756.935, "dur": 16.990, + "args": { + "External id": 981186,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938401790.104, "dur": 18.214, + "args": { + "External id": 981187,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345938401821.295, "dur": 16.852, + "args": { + "External id": 981188,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938401919.862, "dur": 16.459, + "args": { + "External id": 981189,"Record function id": 0, "Ev Idx": 3780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938401923.628, "dur": 11.723, + "args": { + "External id": 981190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938401928.326, "dur": 6.107, + "args": { + "External id": 981191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938401930.214, "dur": 4.094, + "args": { + "External id": 981192,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938401940.584, "dur": 5.518, + "args": { + "External id": 981193,"Record function id": 0, "Ev Idx": 3784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938401942.039, "dur": 3.636, + "args": { + "External id": 981194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938401943.095, "dur": 2.017, + "args": { + "External id": 981195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938401943.991, "dur": 1.009, + "args": { + "External id": 981196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938401949.608, "dur": 6.734, + "args": { + "External id": 981197,"Record function id": 0, "Ev Idx": 3788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938401951.131, "dur": 4.794, + "args": { + "External id": 981198,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938401951.784, "dur": 3.555, + "args": { + "External id": 981199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938401952.279, "dur": 2.989, + "args": { + "External id": 981200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938401959.497, "dur": 3.901, + "args": { + "External id": 981201,"Record function id": 0, "Ev Idx": 3792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938401960.714, "dur": 2.288, + "args": { + "External id": 981202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938401961.311, "dur": 1.105, + "args": { + "External id": 981203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938401961.709, "dur": 0.642, + "args": { + "External id": 981204,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938401966.544, "dur": 4.689, + "args": { + "External id": 981205,"Record function id": 0, "Ev Idx": 3796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938401967.978, "dur": 2.855, + "args": { + "External id": 981206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938401969.153, "dur": 1.245, + "args": { + "External id": 981207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938401969.698, "dur": 0.635, + "args": { + "External id": 981208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938401974.750, "dur": 4.582, + "args": { + "External id": 981209,"Record function id": 0, "Ev Idx": 3800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938401976.439, "dur": 2.483, + "args": { + "External id": 981210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938401977.002, "dur": 1.425, + "args": { + "External id": 981211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938401977.555, "dur": 0.751, + "args": { + "External id": 981212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938401982.755, "dur": 3.661, + "args": { + "External id": 981213,"Record function id": 0, "Ev Idx": 3804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938401983.886, "dur": 2.101, + "args": { + "External id": 981214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938401984.385, "dur": 1.167, + "args": { + "External id": 981215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938401984.826, "dur": 0.627, + "args": { + "External id": 981216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938401989.547, "dur": 3.717, + "args": { + "External id": 981217,"Record function id": 0, "Ev Idx": 3808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938401990.851, "dur": 2.024, + "args": { + "External id": 981218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938401991.347, "dur": 0.992, + "args": { + "External id": 981219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938401991.604, "dur": 0.669, + "args": { + "External id": 981220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938401996.441, "dur": 4.196, + "args": { + "External id": 981221,"Record function id": 0, "Ev Idx": 3812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938401997.720, "dur": 2.508, + "args": { + "External id": 981222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938401998.172, "dur": 1.293, + "args": { + "External id": 981223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938401998.775, "dur": 0.593, + "args": { + "External id": 981224,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938402004.691, "dur": 67932.164, + "args": { + "External id": 981225,"Record function id": 0, "Sequence number": 10552267, "Fwd thread id": 1, "Ev Idx": 3816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938402005.989, "dur": 67921.424, + "args": { + "External id": 981226,"Sequence number": 10552267, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3817 + } + }, + { + "ph": "f", "id": 199, "pid": 2338706, "tid": 2379422, "ts": 6345938402005.989, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.12)", "pid": 2338706, "tid": 2379422, + "ts": 6345938402090.010, "dur": 39.761, + "args": { + "External id": 981227,"Record function id": 0, "Ev Idx": 3818 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.12)", "pid": 2338706, "tid": 2379422, + "ts": 6345938402137.841, "dur": 69.601, + "args": { + "External id": 981228,"Record function id": 0, "Ev Idx": 3819 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.12)", "pid": 2338706, "tid": 2379422, + "ts": 6345938402213.152, "dur": 67705.997, + "args": { + "External id": 981229,"Record function id": 0, "Ev Idx": 3820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938402307.440, "dur": 8.171, + "args": { + "External id": 981230,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938402326.771, "dur": 7.445, + "args": { + "External id": 981231,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938402349.921, "dur": 66629.603, + "args": { + "External id": 981232,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938402364.295, "dur": 66600.731, + "args": { + "External id": 981233,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938402466.804, "dur": 19.251, + "args": { + "External id": 981234,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938402505.585, "dur": 66411.082, + "args": { + "External id": 981235,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938402509.167, "dur": 66406.263, + "args": { + "External id": 981236,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938402514.156, "dur": 11.057, + "args": { + "External id": 981237,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938402527.755, "dur": 66382.259, + "args": { + "External id": 981238,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938469136.310, "dur": 13.739, + "args": { + "External id": 981239,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938469140.826, "dur": 8.626, + "args": { + "External id": 981240,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938469182.826, "dur": 427.947, + "args": { + "External id": 981241,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938469211.628, "dur": 393.564, + "args": { + "External id": 981242,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3833, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938469224.222, "dur": 375.148, + "args": { + "External id": 981243,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938469630.100, "dur": 2.441, + "args": { + "External id": 981244,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3835, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938469695.840, "dur": 7.149, + "args": { + "External id": 981245,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938469752.019, "dur": 1.456, + "args": { + "External id": 981246,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938469771.373, "dur": 3.189, + "args": { + "External id": 981247,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938469787.674, "dur": 0.952, + "args": { + "External id": 981248,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938469801.100, "dur": 1.244, + "args": { + "External id": 981249,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938469813.634, "dur": 0.902, + "args": { + "External id": 981250,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938469826.193, "dur": 3.335, + "args": { + "External id": 981251,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938469840.899, "dur": 2.501, + "args": { + "External id": 981252,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938469854.626, "dur": 1.227, + "args": { + "External id": 981253,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938469953.010, "dur": 3046.609, + "args": { + "External id": 981254,"Record function id": 0, "Ev Idx": 3845 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.11)", "pid": 2338706, "tid": 2379422, + "ts": 6345938469974.329, "dur": 1222.065, + "args": { + "External id": 981255,"Record function id": 0, "Ev Idx": 3846 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338706, "tid": 2379422, + "ts": 6345938469991.026, "dur": 420.002, + "args": { + "External id": 981256,"Record function id": 0, "Ev Idx": 3847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938470145.420, "dur": 5.634, + "args": { + "External id": 981257,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938470154.505, "dur": 1.482, + "args": { + "External id": 981258,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938470158.055, "dur": 3.817, + "args": { + "External id": 981259,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938470163.567, "dur": 1.091, + "args": { + "External id": 981260,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938470166.405, "dur": 1.342, + "args": { + "External id": 981261,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938470169.555, "dur": 1.179, + "args": { + "External id": 981262,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938470172.489, "dur": 2.238, + "args": { + "External id": 981263,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938470176.130, "dur": 0.821, + "args": { + "External id": 981264,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938470178.680, "dur": 1.106, + "args": { + "External id": 981265,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938470181.270, "dur": 0.841, + "args": { + "External id": 981266,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938470201.662, "dur": 173.402, + "args": { + "External id": 981267,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938470219.165, "dur": 150.893, + "args": { + "External id": 981268,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938470245.904, "dur": 19.162, + "args": { + "External id": 981269,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938470269.525, "dur": 69.018, + "args": { + "External id": 981270,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938470272.735, "dur": 65.412, + "args": { + "External id": 981271,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470277.101, "dur": 5.330, + "args": { + "External id": 981272,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938470284.559, "dur": 52.861, + "args": { + "External id": 981273,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3864 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.10", "pid": 2338706, "tid": 2379422, + "ts": 6345938470506.518, "dur": 680.532, + "args": { + "External id": 981274,"Record function id": 0, "Ev Idx": 3865 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338706, "tid": 2379422, + "ts": 6345938470525.848, "dur": 646.845, + "args": { + "External id": 981275,"Record function id": 0, "Ev Idx": 3866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938470593.866, "dur": 4.836, + "args": { + "External id": 981276,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938470614.692, "dur": 32.050, + "args": { + "External id": 981277,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470620.218, "dur": 1.838, + "args": { + "External id": 981278,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470624.544, "dur": 0.468, + "args": { + "External id": 981279,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470626.781, "dur": 0.580, + "args": { + "External id": 981280,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470629.028, "dur": 0.627, + "args": { + "External id": 981281,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470631.194, "dur": 0.488, + "args": { + "External id": 981282,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470633.315, "dur": 2.710, + "args": { + "External id": 981283,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470637.549, "dur": 0.444, + "args": { + "External id": 981284,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470639.695, "dur": 0.613, + "args": { + "External id": 981285,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470641.911, "dur": 0.484, + "args": { + "External id": 981286,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938470658.275, "dur": 46.677, + "args": { + "External id": 981287,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345938470737.197, "dur": 136.079, + "args": { + "External id": 981288,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938470747.351, "dur": 3.550, + "args": { + "External id": 981289,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345938470756.335, "dur": 10.738, + "args": { + "External id": 981290,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345938470760.951, "dur": 5.714, + "args": { + "External id": 981291,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470764.802, "dur": 0.614, + "args": { + "External id": 981292,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938470773.339, "dur": 47.398, + "args": { + "External id": 981293,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470775.763, "dur": 0.355, + "args": { + "External id": 981294,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470778.126, "dur": 0.377, + "args": { + "External id": 981295,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470799.526, "dur": 2.473, + "args": { + "External id": 981296,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470803.964, "dur": 0.466, + "args": { + "External id": 981297,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470806.094, "dur": 0.566, + "args": { + "External id": 981298,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470808.453, "dur": 0.478, + "args": { + "External id": 981299,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470810.596, "dur": 0.431, + "args": { + "External id": 981300,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470812.856, "dur": 0.366, + "args": { + "External id": 981301,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938470815.030, "dur": 0.672, + "args": { + "External id": 981302,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938470831.082, "dur": 34.197, + "args": { + "External id": 981303,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938470921.307, "dur": 171.248, + "args": { + "External id": 981304,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938470944.036, "dur": 143.447, + "args": { + "External id": 981305,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3896, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938470954.134, "dur": 95.684, + "args": { + "External id": 981306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938471110.349, "dur": 2.638, + "args": { + "External id": 981307,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3898, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938471205.354, "dur": 1770.845, + "args": { + "External id": 981308,"Sequence number": 10552266, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3899 + } + }, + { + "ph": "f", "id": 200, "pid": 2338706, "tid": 2379422, "ts": 6345938471205.354, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938471325.435, "dur": 116.240, + "args": { + "External id": 981309,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938471485.542, "dur": 44.530, + "args": { + "External id": 981310,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345938471548.447, "dur": 53.231, + "args": { + "External id": 981311,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938471613.900, "dur": 33.184, + "args": { + "External id": 981312,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938471654.221, "dur": 35.774, + "args": { + "External id": 981313,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938471697.898, "dur": 28.607, + "args": { + "External id": 981314,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938471733.898, "dur": 29.529, + "args": { + "External id": 981315,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938471787.571, "dur": 23.792, + "args": { + "External id": 981316,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938471830.285, "dur": 29.369, + "args": { + "External id": 981317,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938471881.729, "dur": 19.983, + "args": { + "External id": 981318,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938471914.967, "dur": 16.155, + "args": { + "External id": 981319,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938471940.018, "dur": 37.421, + "args": { + "External id": 981320,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938471981.319, "dur": 51.744, + "args": { + "External id": 981321,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345938472101.465, "dur": 260.376, + "args": { + "External id": 981322,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938472186.686, "dur": 6.924, + "args": { + "External id": 981323,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938472196.107, "dur": 2.656, + "args": { + "External id": 981324,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938472200.050, "dur": 1.763, + "args": { + "External id": 981325,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938472203.047, "dur": 1.831, + "args": { + "External id": 981326,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938472253.567, "dur": 5.209, + "args": { + "External id": 981327,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938472255.680, "dur": 2.948, + "args": { + "External id": 981328,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938472261.322, "dur": 34.907, + "args": { + "External id": 981329,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938472266.953, "dur": 3.924, + "args": { + "External id": 981330,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938472298.169, "dur": 2.013, + "args": { + "External id": 981331,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938472299.393, "dur": 0.691, + "args": { + "External id": 981332,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938472301.474, "dur": 14.738, + "args": { + "External id": 981333,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938472302.953, "dur": 0.681, + "args": { + "External id": 981334,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938472402.492, "dur": 29.575, + "args": { + "External id": 981335,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938472449.847, "dur": 16.877, + "args": { + "External id": 981336,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938472475.483, "dur": 47.301, + "args": { + "External id": 981337,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938472530.131, "dur": 55.831, + "args": { + "External id": 981338,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938472605.016, "dur": 27.493, + "args": { + "External id": 981339,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938472639.252, "dur": 33.442, + "args": { + "External id": 981340,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938472680.309, "dur": 31.827, + "args": { + "External id": 981341,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938472720.101, "dur": 30.816, + "args": { + "External id": 981342,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345938472774.930, "dur": 27.140, + "args": { + "External id": 981343,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938472817.249, "dur": 30.401, + "args": { + "External id": 981344,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938472862.073, "dur": 19.270, + "args": { + "External id": 981345,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938472897.327, "dur": 16.881, + "args": { + "External id": 981346,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345938472926.346, "dur": 18.208, + "args": { + "External id": 981347,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938473043.189, "dur": 71.503, + "args": { + "External id": 981348,"Record function id": 0, "Ev Idx": 3939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938473047.079, "dur": 65.619, + "args": { + "External id": 981349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938473051.856, "dur": 58.962, + "args": { + "External id": 981350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938473105.415, "dur": 5.051, + "args": { + "External id": 981351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938473121.693, "dur": 6.627, + "args": { + "External id": 981352,"Record function id": 0, "Ev Idx": 3943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938473123.606, "dur": 4.305, + "args": { + "External id": 981353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938473125.175, "dur": 2.271, + "args": { + "External id": 981354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938473126.216, "dur": 1.093, + "args": { + "External id": 981355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938473131.680, "dur": 7.412, + "args": { + "External id": 981356,"Record function id": 0, "Ev Idx": 3947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938473133.358, "dur": 5.324, + "args": { + "External id": 981357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938473134.023, "dur": 4.087, + "args": { + "External id": 981358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938473134.436, "dur": 3.606, + "args": { + "External id": 981359,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938473142.287, "dur": 5.614, + "args": { + "External id": 981360,"Record function id": 0, "Ev Idx": 3951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938473143.885, "dur": 3.605, + "args": { + "External id": 981361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938473144.763, "dur": 2.141, + "args": { + "External id": 981362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938473145.845, "dur": 0.990, + "args": { + "External id": 981363,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938473150.919, "dur": 4.515, + "args": { + "External id": 981364,"Record function id": 0, "Ev Idx": 3955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938473152.724, "dur": 2.293, + "args": { + "External id": 981365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938473153.346, "dur": 1.146, + "args": { + "External id": 981366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938473153.800, "dur": 0.616, + "args": { + "External id": 981367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938473162.153, "dur": 4.393, + "args": { + "External id": 981368,"Record function id": 0, "Ev Idx": 3959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938473163.470, "dur": 2.660, + "args": { + "External id": 981369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938473164.163, "dur": 1.419, + "args": { + "External id": 981370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938473164.819, "dur": 0.661, + "args": { + "External id": 981371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938473169.896, "dur": 31.039, + "args": { + "External id": 981372,"Record function id": 0, "Ev Idx": 3963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938473171.048, "dur": 29.408, + "args": { + "External id": 981373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938473171.518, "dur": 28.305, + "args": { + "External id": 981374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938473198.912, "dur": 0.810, + "args": { + "External id": 981375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938473204.236, "dur": 4.623, + "args": { + "External id": 981376,"Record function id": 0, "Ev Idx": 3967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938473205.682, "dur": 2.758, + "args": { + "External id": 981377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938473206.444, "dur": 1.344, + "args": { + "External id": 981378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938473206.979, "dur": 0.744, + "args": { + "External id": 981379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938473211.946, "dur": 6.846, + "args": { + "External id": 981380,"Record function id": 0, "Ev Idx": 3971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938473213.356, "dur": 5.017, + "args": { + "External id": 981381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938473213.923, "dur": 3.697, + "args": { + "External id": 981382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938473216.751, "dur": 0.762, + "args": { + "External id": 981383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938473223.246, "dur": 64663.747, + "args": { + "External id": 981384,"Record function id": 0, "Sequence number": 10552265, "Fwd thread id": 1, "Ev Idx": 3975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938473224.623, "dur": 64652.751, + "args": { + "External id": 981385,"Sequence number": 10552265, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3976 + } + }, + { + "ph": "f", "id": 201, "pid": 2338706, "tid": 2379422, "ts": 6345938473224.623, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.11)", "pid": 2338706, "tid": 2379422, + "ts": 6345938473263.770, "dur": 45.912, + "args": { + "External id": 981386,"Record function id": 0, "Ev Idx": 3977 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.11)", "pid": 2338706, "tid": 2379422, + "ts": 6345938473317.931, "dur": 72.315, + "args": { + "External id": 981387,"Record function id": 0, "Ev Idx": 3978 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.11)", "pid": 2338706, "tid": 2379422, + "ts": 6345938473396.183, "dur": 64472.093, + "args": { + "External id": 981388,"Record function id": 0, "Ev Idx": 3979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938473495.408, "dur": 7.857, + "args": { + "External id": 981389,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938473513.631, "dur": 7.137, + "args": { + "External id": 981390,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938473536.331, "dur": 63419.730, + "args": { + "External id": 981391,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938473550.041, "dur": 63392.143, + "args": { + "External id": 981392,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938473640.263, "dur": 18.566, + "args": { + "External id": 981393,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938473682.396, "dur": 63210.924, + "args": { + "External id": 981394,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938473685.831, "dur": 63206.456, + "args": { + "External id": 981395,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938473690.622, "dur": 11.601, + "args": { + "External id": 981396,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938473704.577, "dur": 63182.670, + "args": { + "External id": 981397,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938537117.662, "dur": 14.447, + "args": { + "External id": 981398,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938537122.079, "dur": 9.343, + "args": { + "External id": 981399,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938537167.652, "dur": 396.480, + "args": { + "External id": 981400,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938537197.418, "dur": 361.488, + "args": { + "External id": 981401,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3992, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938537210.829, "dur": 342.664, + "args": { + "External id": 981402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938537583.073, "dur": 2.456, + "args": { + "External id": 981403,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3994, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938537649.979, "dur": 7.234, + "args": { + "External id": 981404,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938537706.556, "dur": 1.565, + "args": { + "External id": 981405,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938537726.276, "dur": 3.714, + "args": { + "External id": 981406,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938537741.219, "dur": 0.941, + "args": { + "External id": 981407,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938537753.712, "dur": 1.043, + "args": { + "External id": 981408,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938537765.295, "dur": 1.125, + "args": { + "External id": 981409,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938537777.484, "dur": 3.354, + "args": { + "External id": 981410,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938537792.172, "dur": 2.148, + "args": { + "External id": 981411,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938537804.638, "dur": 1.014, + "args": { + "External id": 981412,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938537902.067, "dur": 3221.582, + "args": { + "External id": 981413,"Record function id": 0, "Ev Idx": 4004 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.10)", "pid": 2338706, "tid": 2379422, + "ts": 6345938537923.532, "dur": 1217.493, + "args": { + "External id": 981414,"Record function id": 0, "Ev Idx": 4005 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338706, "tid": 2379422, + "ts": 6345938537939.405, "dur": 414.090, + "args": { + "External id": 981415,"Record function id": 0, "Ev Idx": 4006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938538091.147, "dur": 5.309, + "args": { + "External id": 981416,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938538101.495, "dur": 1.139, + "args": { + "External id": 981417,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938538104.579, "dur": 3.279, + "args": { + "External id": 981418,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938538110.443, "dur": 1.010, + "args": { + "External id": 981419,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938538113.039, "dur": 1.155, + "args": { + "External id": 981420,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938538116.174, "dur": 1.052, + "args": { + "External id": 981421,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938538118.854, "dur": 2.494, + "args": { + "External id": 981422,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938538123.192, "dur": 1.063, + "args": { + "External id": 981423,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938538128.206, "dur": 0.905, + "args": { + "External id": 981424,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938538130.867, "dur": 0.634, + "args": { + "External id": 981425,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938538150.892, "dur": 167.453, + "args": { + "External id": 981426,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938538169.421, "dur": 144.251, + "args": { + "External id": 981427,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938538189.367, "dur": 19.558, + "args": { + "External id": 981428,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938538212.684, "dur": 70.170, + "args": { + "External id": 981429,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938538215.389, "dur": 67.183, + "args": { + "External id": 981430,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538219.129, "dur": 5.980, + "args": { + "External id": 981431,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938538228.818, "dur": 53.099, + "args": { + "External id": 981432,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4023 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.9", "pid": 2338706, "tid": 2379422, + "ts": 6345938538449.765, "dur": 682.275, + "args": { + "External id": 981433,"Record function id": 0, "Ev Idx": 4024 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338706, "tid": 2379422, + "ts": 6345938538468.474, "dur": 648.344, + "args": { + "External id": 981434,"Record function id": 0, "Ev Idx": 4025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938538533.241, "dur": 6.532, + "args": { + "External id": 981435,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938538556.120, "dur": 31.744, + "args": { + "External id": 981436,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538561.546, "dur": 1.862, + "args": { + "External id": 981437,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538565.059, "dur": 0.685, + "args": { + "External id": 981438,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538566.897, "dur": 0.672, + "args": { + "External id": 981439,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538569.012, "dur": 0.490, + "args": { + "External id": 981440,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538571.463, "dur": 0.422, + "args": { + "External id": 981441,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538573.801, "dur": 2.395, + "args": { + "External id": 981442,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538578.265, "dur": 0.511, + "args": { + "External id": 981443,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538580.702, "dur": 0.314, + "args": { + "External id": 981444,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538582.827, "dur": 0.571, + "args": { + "External id": 981445,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938538598.756, "dur": 43.660, + "args": { + "External id": 981446,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345938538675.643, "dur": 131.926, + "args": { + "External id": 981447,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938538685.129, "dur": 3.847, + "args": { + "External id": 981448,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345938538694.351, "dur": 11.310, + "args": { + "External id": 981449,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345938538699.029, "dur": 6.196, + "args": { + "External id": 981450,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538703.032, "dur": 0.930, + "args": { + "External id": 981451,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938538712.121, "dur": 25.746, + "args": { + "External id": 981452,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538714.356, "dur": 0.543, + "args": { + "External id": 981453,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538716.496, "dur": 0.539, + "args": { + "External id": 981454,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538718.552, "dur": 2.483, + "args": { + "External id": 981455,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538722.565, "dur": 0.618, + "args": { + "External id": 981456,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538725.131, "dur": 0.331, + "args": { + "External id": 981457,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538726.759, "dur": 0.513, + "args": { + "External id": 981458,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538728.902, "dur": 0.462, + "args": { + "External id": 981459,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538731.010, "dur": 0.704, + "args": { + "External id": 981460,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938538733.408, "dur": 0.544, + "args": { + "External id": 981461,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938538767.819, "dur": 31.471, + "args": { + "External id": 981462,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938538853.998, "dur": 122.112, + "args": { + "External id": 981463,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938538884.552, "dur": 88.181, + "args": { + "External id": 981464,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4055, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938538894.801, "dur": 73.853, + "args": { + "External id": 981465,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938538996.846, "dur": 2.292, + "args": { + "External id": 981466,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4057, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938539148.832, "dur": 1902.915, + "args": { + "External id": 981467,"Sequence number": 10552264, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4058 + } + }, + { + "ph": "f", "id": 202, "pid": 2338706, "tid": 2379422, "ts": 6345938539148.832, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938539270.481, "dur": 114.128, + "args": { + "External id": 981468,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938539431.545, "dur": 42.219, + "args": { + "External id": 981469,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345938539491.060, "dur": 49.918, + "args": { + "External id": 981470,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938539553.178, "dur": 31.915, + "args": { + "External id": 981471,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938539591.588, "dur": 34.054, + "args": { + "External id": 981472,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938539632.232, "dur": 27.297, + "args": { + "External id": 981473,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938539667.212, "dur": 29.605, + "args": { + "External id": 981474,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938539724.098, "dur": 25.175, + "args": { + "External id": 981475,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938539767.990, "dur": 30.169, + "args": { + "External id": 981476,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938539824.258, "dur": 21.722, + "args": { + "External id": 981477,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938539858.943, "dur": 16.446, + "args": { + "External id": 981478,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938539883.646, "dur": 34.952, + "args": { + "External id": 981479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938539922.066, "dur": 31.988, + "args": { + "External id": 981480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345938539982.988, "dur": 332.598, + "args": { + "External id": 981481,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938540134.086, "dur": 7.094, + "args": { + "External id": 981482,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938540143.465, "dur": 2.736, + "args": { + "External id": 981483,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938540147.566, "dur": 2.402, + "args": { + "External id": 981484,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938540151.359, "dur": 2.015, + "args": { + "External id": 981485,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938540202.423, "dur": 5.290, + "args": { + "External id": 981486,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938540204.722, "dur": 2.812, + "args": { + "External id": 981487,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938540209.990, "dur": 38.463, + "args": { + "External id": 981488,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938540218.554, "dur": 3.341, + "args": { + "External id": 981489,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938540249.897, "dur": 2.036, + "args": { + "External id": 981490,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938540251.159, "dur": 0.662, + "args": { + "External id": 981491,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938540253.230, "dur": 16.257, + "args": { + "External id": 981492,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938540255.337, "dur": 0.729, + "args": { + "External id": 981493,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938540356.235, "dur": 27.313, + "args": { + "External id": 981494,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938540401.425, "dur": 17.816, + "args": { + "External id": 981495,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938540427.240, "dur": 49.868, + "args": { + "External id": 981496,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938540483.232, "dur": 44.701, + "args": { + "External id": 981497,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938540537.722, "dur": 39.390, + "args": { + "External id": 981498,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938540595.045, "dur": 34.721, + "args": { + "External id": 981499,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938540637.410, "dur": 28.231, + "args": { + "External id": 981500,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938540672.723, "dur": 32.460, + "args": { + "External id": 981501,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345938540731.702, "dur": 29.896, + "args": { + "External id": 981502,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938540781.413, "dur": 27.381, + "args": { + "External id": 981503,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938540824.823, "dur": 19.948, + "args": { + "External id": 981504,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938540946.547, "dur": 19.361, + "args": { + "External id": 981505,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345938540981.076, "dur": 18.786, + "args": { + "External id": 981506,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938541149.430, "dur": 17.448, + "args": { + "External id": 981507,"Record function id": 0, "Ev Idx": 4098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938541153.313, "dur": 12.471, + "args": { + "External id": 981508,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938541158.149, "dur": 6.761, + "args": { + "External id": 981509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938541160.108, "dur": 4.682, + "args": { + "External id": 981510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938541170.781, "dur": 5.708, + "args": { + "External id": 981511,"Record function id": 0, "Ev Idx": 4102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938541172.177, "dur": 3.821, + "args": { + "External id": 981512,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938541173.030, "dur": 2.373, + "args": { + "External id": 981513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938541173.970, "dur": 1.332, + "args": { + "External id": 981514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938541179.747, "dur": 7.416, + "args": { + "External id": 981515,"Record function id": 0, "Ev Idx": 4106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938541181.418, "dur": 5.196, + "args": { + "External id": 981516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938541182.295, "dur": 3.884, + "args": { + "External id": 981517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938541182.642, "dur": 3.476, + "args": { + "External id": 981518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938541190.477, "dur": 4.096, + "args": { + "External id": 981519,"Record function id": 0, "Ev Idx": 4110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938541191.656, "dur": 2.477, + "args": { + "External id": 981520,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938541192.175, "dur": 1.422, + "args": { + "External id": 981521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938541192.725, "dur": 0.811, + "args": { + "External id": 981522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938541197.799, "dur": 4.427, + "args": { + "External id": 981523,"Record function id": 0, "Ev Idx": 4114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938541198.972, "dur": 2.826, + "args": { + "External id": 981524,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938541199.534, "dur": 1.536, + "args": { + "External id": 981525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938541200.219, "dur": 0.738, + "args": { + "External id": 981526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938541205.397, "dur": 3.856, + "args": { + "External id": 981527,"Record function id": 0, "Ev Idx": 4118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938541206.848, "dur": 1.971, + "args": { + "External id": 981528,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938541207.350, "dur": 1.028, + "args": { + "External id": 981529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938541207.654, "dur": 0.642, + "args": { + "External id": 981530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938541212.666, "dur": 4.391, + "args": { + "External id": 981531,"Record function id": 0, "Ev Idx": 4122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938541214.290, "dur": 2.337, + "args": { + "External id": 981532,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938541215.172, "dur": 1.057, + "args": { + "External id": 981533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938541215.450, "dur": 0.674, + "args": { + "External id": 981534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938541220.334, "dur": 3.993, + "args": { + "External id": 981535,"Record function id": 0, "Ev Idx": 4126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938541221.504, "dur": 2.326, + "args": { + "External id": 981536,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938541222.011, "dur": 1.387, + "args": { + "External id": 981537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938541222.527, "dur": 0.758, + "args": { + "External id": 981538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938541230.846, "dur": 4.021, + "args": { + "External id": 981539,"Record function id": 0, "Ev Idx": 4130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938541232.157, "dur": 2.279, + "args": { + "External id": 981540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938541232.690, "dur": 1.329, + "args": { + "External id": 981541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938541233.329, "dur": 0.631, + "args": { + "External id": 981542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938541238.725, "dur": 64259.094, + "args": { + "External id": 981543,"Record function id": 0, "Sequence number": 10552263, "Fwd thread id": 1, "Ev Idx": 4134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938541240.009, "dur": 64249.015, + "args": { + "External id": 981544,"Sequence number": 10552263, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4135 + } + }, + { + "ph": "f", "id": 203, "pid": 2338706, "tid": 2379422, "ts": 6345938541240.009, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.10)", "pid": 2338706, "tid": 2379422, + "ts": 6345938541273.865, "dur": 46.782, + "args": { + "External id": 981545,"Record function id": 0, "Ev Idx": 4136 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.10)", "pid": 2338706, "tid": 2379422, + "ts": 6345938541329.021, "dur": 70.185, + "args": { + "External id": 981546,"Record function id": 0, "Ev Idx": 4137 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.10)", "pid": 2338706, "tid": 2379422, + "ts": 6345938541404.934, "dur": 64075.453, + "args": { + "External id": 981547,"Record function id": 0, "Ev Idx": 4138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938541500.026, "dur": 8.022, + "args": { + "External id": 981548,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938541518.258, "dur": 6.634, + "args": { + "External id": 981549,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938541540.286, "dur": 63007.670, + "args": { + "External id": 981550,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938541555.443, "dur": 62977.664, + "args": { + "External id": 981551,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938541674.229, "dur": 18.395, + "args": { + "External id": 981552,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938541712.139, "dur": 62772.561, + "args": { + "External id": 981553,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938541715.479, "dur": 62768.085, + "args": { + "External id": 981554,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938541720.372, "dur": 11.132, + "args": { + "External id": 981555,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938541733.506, "dur": 62745.136, + "args": { + "External id": 981556,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938604685.154, "dur": 13.795, + "args": { + "External id": 981557,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938604689.090, "dur": 9.285, + "args": { + "External id": 981558,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938604731.445, "dur": 437.500, + "args": { + "External id": 981559,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938604767.043, "dur": 395.757, + "args": { + "External id": 981560,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4151, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938604779.145, "dur": 377.243, + "args": { + "External id": 981561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938605195.297, "dur": 2.661, + "args": { + "External id": 981562,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4153, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938605267.229, "dur": 6.959, + "args": { + "External id": 981563,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938605320.811, "dur": 1.868, + "args": { + "External id": 981564,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938605338.980, "dur": 3.760, + "args": { + "External id": 981565,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938605355.534, "dur": 1.026, + "args": { + "External id": 981566,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938605369.682, "dur": 1.033, + "args": { + "External id": 981567,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938605381.071, "dur": 0.906, + "args": { + "External id": 981568,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938605392.032, "dur": 2.984, + "args": { + "External id": 981569,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938605405.452, "dur": 2.264, + "args": { + "External id": 981570,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938605417.437, "dur": 1.023, + "args": { + "External id": 981571,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938605512.539, "dur": 3132.563, + "args": { + "External id": 981572,"Record function id": 0, "Ev Idx": 4163 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.9)", "pid": 2338706, "tid": 2379422, + "ts": 6345938605535.988, "dur": 1158.320, + "args": { + "External id": 981573,"Record function id": 0, "Ev Idx": 4164 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338706, "tid": 2379422, + "ts": 6345938605551.023, "dur": 337.505, + "args": { + "External id": 981574,"Record function id": 0, "Ev Idx": 4165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938605645.880, "dur": 4.029, + "args": { + "External id": 981575,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938605653.227, "dur": 0.914, + "args": { + "External id": 981576,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938605656.025, "dur": 3.491, + "args": { + "External id": 981577,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938605661.411, "dur": 0.830, + "args": { + "External id": 981578,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938605663.964, "dur": 0.821, + "args": { + "External id": 981579,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938605666.378, "dur": 0.756, + "args": { + "External id": 981580,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938605668.507, "dur": 2.532, + "args": { + "External id": 981581,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938605675.172, "dur": 0.741, + "args": { + "External id": 981582,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938605680.192, "dur": 0.623, + "args": { + "External id": 981583,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938605682.661, "dur": 0.969, + "args": { + "External id": 981584,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938605703.601, "dur": 154.731, + "args": { + "External id": 981585,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938605721.059, "dur": 132.960, + "args": { + "External id": 981586,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938605736.803, "dur": 18.250, + "args": { + "External id": 981587,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938605761.211, "dur": 64.730, + "args": { + "External id": 981588,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938605764.021, "dur": 61.630, + "args": { + "External id": 981589,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938605768.994, "dur": 5.381, + "args": { + "External id": 981590,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938605776.012, "dur": 49.104, + "args": { + "External id": 981591,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4182 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.8", "pid": 2338706, "tid": 2379422, + "ts": 6345938605980.505, "dur": 705.416, + "args": { + "External id": 981592,"Record function id": 0, "Ev Idx": 4183 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338706, "tid": 2379422, + "ts": 6345938605998.262, "dur": 674.221, + "args": { + "External id": 981593,"Record function id": 0, "Ev Idx": 4184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938606126.219, "dur": 7.858, + "args": { + "External id": 981594,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938606151.262, "dur": 33.538, + "args": { + "External id": 981595,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938606156.803, "dur": 1.728, + "args": { + "External id": 981596,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938606161.169, "dur": 1.789, + "args": { + "External id": 981597,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938606164.342, "dur": 0.533, + "args": { + "External id": 981598,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938606166.261, "dur": 0.354, + "args": { + "External id": 981599,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938606169.170, "dur": 0.320, + "args": { + "External id": 981600,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938606171.203, "dur": 2.026, + "args": { + "External id": 981601,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938606174.802, "dur": 0.335, + "args": { + "External id": 981602,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938606177.111, "dur": 0.272, + "args": { + "External id": 981603,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938606179.162, "dur": 0.334, + "args": { + "External id": 981604,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938606196.574, "dur": 49.078, + "args": { + "External id": 981605,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345938606280.300, "dur": 115.323, + "args": { + "External id": 981606,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938606290.873, "dur": 3.852, + "args": { + "External id": 981607,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345938606300.012, "dur": 10.846, + "args": { + "External id": 981608,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345938606304.521, "dur": 5.900, + "args": { + "External id": 981609,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938606308.549, "dur": 0.586, + "args": { + "External id": 981610,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938606317.657, "dur": 25.081, + "args": { + "External id": 981611,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938606320.018, "dur": 0.397, + "args": { + "External id": 981612,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938606322.935, "dur": 0.314, + "args": { + "External id": 981613,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938606324.692, "dur": 2.703, + "args": { + "External id": 981614,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938606328.662, "dur": 0.577, + "args": { + "External id": 981615,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938606330.789, "dur": 0.485, + "args": { + "External id": 981616,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938606332.781, "dur": 0.378, + "args": { + "External id": 981617,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938606335.228, "dur": 0.338, + "args": { + "External id": 981618,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938606337.043, "dur": 0.362, + "args": { + "External id": 981619,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938606338.974, "dur": 0.382, + "args": { + "External id": 981620,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938606353.788, "dur": 33.787, + "args": { + "External id": 981621,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938606444.439, "dur": 151.900, + "args": { + "External id": 981622,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938606477.265, "dur": 115.382, + "args": { + "External id": 981623,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4214, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938606506.485, "dur": 81.813, + "args": { + "External id": 981624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938606618.324, "dur": 1.922, + "args": { + "External id": 981625,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4216, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938606702.124, "dur": 1920.136, + "args": { + "External id": 981626,"Sequence number": 10552262, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4217 + } + }, + { + "ph": "f", "id": 204, "pid": 2338706, "tid": 2379422, "ts": 6345938606702.124, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938606819.692, "dur": 105.246, + "args": { + "External id": 981627,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938606967.799, "dur": 63.060, + "args": { + "External id": 981628,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345938607052.021, "dur": 96.839, + "args": { + "External id": 981629,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938607165.775, "dur": 31.650, + "args": { + "External id": 981630,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938607204.359, "dur": 33.040, + "args": { + "External id": 981631,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938607244.113, "dur": 27.913, + "args": { + "External id": 981632,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938607279.922, "dur": 36.793, + "args": { + "External id": 981633,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938607348.566, "dur": 26.005, + "args": { + "External id": 981634,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938607394.681, "dur": 27.575, + "args": { + "External id": 981635,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938607447.787, "dur": 20.369, + "args": { + "External id": 981636,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938607484.693, "dur": 16.492, + "args": { + "External id": 981637,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938607508.913, "dur": 36.028, + "args": { + "External id": 981638,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938607548.599, "dur": 34.484, + "args": { + "External id": 981639,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345938607616.496, "dur": 266.390, + "args": { + "External id": 981640,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938607699.268, "dur": 6.532, + "args": { + "External id": 981641,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938607708.531, "dur": 2.423, + "args": { + "External id": 981642,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938607712.406, "dur": 1.860, + "args": { + "External id": 981643,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938607715.620, "dur": 1.962, + "args": { + "External id": 981644,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938607762.507, "dur": 7.017, + "args": { + "External id": 981645,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938607765.213, "dur": 4.119, + "args": { + "External id": 981646,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938607773.834, "dur": 39.693, + "args": { + "External id": 981647,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938607780.282, "dur": 3.693, + "args": { + "External id": 981648,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938607815.108, "dur": 1.607, + "args": { + "External id": 981649,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938607816.038, "dur": 0.573, + "args": { + "External id": 981650,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938607817.970, "dur": 16.612, + "args": { + "External id": 981651,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938607820.254, "dur": 0.570, + "args": { + "External id": 981652,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938607922.056, "dur": 26.852, + "args": { + "External id": 981653,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938607968.448, "dur": 18.658, + "args": { + "External id": 981654,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938607994.735, "dur": 97.715, + "args": { + "External id": 981655,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938608102.500, "dur": 44.344, + "args": { + "External id": 981656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938608159.075, "dur": 31.273, + "args": { + "External id": 981657,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938608204.097, "dur": 43.043, + "args": { + "External id": 981658,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938608255.522, "dur": 27.449, + "args": { + "External id": 981659,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938608291.120, "dur": 32.396, + "args": { + "External id": 981660,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345938608360.507, "dur": 29.203, + "args": { + "External id": 981661,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938608410.828, "dur": 24.225, + "args": { + "External id": 981662,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938608455.697, "dur": 21.280, + "args": { + "External id": 981663,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938608541.423, "dur": 14.757, + "args": { + "External id": 981664,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345938608574.935, "dur": 15.775, + "args": { + "External id": 981665,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938608669.377, "dur": 17.166, + "args": { + "External id": 981666,"Record function id": 0, "Ev Idx": 4257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938608673.100, "dur": 12.284, + "args": { + "External id": 981667,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938608677.623, "dur": 6.941, + "args": { + "External id": 981668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938608679.094, "dur": 5.345, + "args": { + "External id": 981669,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938608690.520, "dur": 5.590, + "args": { + "External id": 981670,"Record function id": 0, "Ev Idx": 4261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938608692.501, "dur": 3.082, + "args": { + "External id": 981671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938608693.460, "dur": 1.510, + "args": { + "External id": 981672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938608694.023, "dur": 0.861, + "args": { + "External id": 981673,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938608699.736, "dur": 6.976, + "args": { + "External id": 981674,"Record function id": 0, "Ev Idx": 4265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938608701.463, "dur": 4.788, + "args": { + "External id": 981675,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938608702.118, "dur": 3.755, + "args": { + "External id": 981676,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938608702.686, "dur": 3.081, + "args": { + "External id": 981677,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938608709.996, "dur": 4.828, + "args": { + "External id": 981678,"Record function id": 0, "Ev Idx": 4269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938608711.537, "dur": 2.836, + "args": { + "External id": 981679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938608712.277, "dur": 1.703, + "args": { + "External id": 981680,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938608712.993, "dur": 0.901, + "args": { + "External id": 981681,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938608717.937, "dur": 3.841, + "args": { + "External id": 981682,"Record function id": 0, "Ev Idx": 4273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938608719.249, "dur": 2.092, + "args": { + "External id": 981683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938608719.920, "dur": 1.038, + "args": { + "External id": 981684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938608720.210, "dur": 0.676, + "args": { + "External id": 981685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938608724.877, "dur": 5.248, + "args": { + "External id": 981686,"Record function id": 0, "Ev Idx": 4277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938608726.617, "dur": 3.111, + "args": { + "External id": 981687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938608727.397, "dur": 1.792, + "args": { + "External id": 981688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938608728.413, "dur": 0.709, + "args": { + "External id": 981689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938608733.582, "dur": 4.167, + "args": { + "External id": 981690,"Record function id": 0, "Ev Idx": 4281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938608734.962, "dur": 2.376, + "args": { + "External id": 981691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938608735.837, "dur": 1.125, + "args": { + "External id": 981692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938608736.157, "dur": 0.733, + "args": { + "External id": 981693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938608740.997, "dur": 4.627, + "args": { + "External id": 981694,"Record function id": 0, "Ev Idx": 4285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938608742.886, "dur": 2.293, + "args": { + "External id": 981695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938608743.814, "dur": 0.958, + "args": { + "External id": 981696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938608744.104, "dur": 0.594, + "args": { + "External id": 981697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938608749.061, "dur": 5.976, + "args": { + "External id": 981698,"Record function id": 0, "Ev Idx": 4289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938608750.310, "dur": 4.306, + "args": { + "External id": 981699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938608751.116, "dur": 3.122, + "args": { + "External id": 981700,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938608753.635, "dur": 0.528, + "args": { + "External id": 981701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938608759.589, "dur": 65884.698, + "args": { + "External id": 981702,"Record function id": 0, "Sequence number": 10552261, "Fwd thread id": 1, "Ev Idx": 4293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938608761.507, "dur": 65872.994, + "args": { + "External id": 981703,"Sequence number": 10552261, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4294 + } + }, + { + "ph": "f", "id": 205, "pid": 2338706, "tid": 2379422, "ts": 6345938608761.507, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.9)", "pid": 2338706, "tid": 2379422, + "ts": 6345938608790.917, "dur": 38.517, + "args": { + "External id": 981704,"Record function id": 0, "Ev Idx": 4295 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.9)", "pid": 2338706, "tid": 2379422, + "ts": 6345938608836.837, "dur": 66.242, + "args": { + "External id": 981705,"Record function id": 0, "Ev Idx": 4296 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.9)", "pid": 2338706, "tid": 2379422, + "ts": 6345938608912.084, "dur": 65714.902, + "args": { + "External id": 981706,"Record function id": 0, "Ev Idx": 4297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938609003.475, "dur": 25.657, + "args": { + "External id": 981707,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938609042.549, "dur": 6.879, + "args": { + "External id": 981708,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938609102.352, "dur": 64645.738, + "args": { + "External id": 981709,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938609117.684, "dur": 64615.761, + "args": { + "External id": 981710,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938609209.893, "dur": 19.917, + "args": { + "External id": 981711,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938609249.755, "dur": 64434.616, + "args": { + "External id": 981712,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938609252.592, "dur": 64430.594, + "args": { + "External id": 981713,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938609257.622, "dur": 14.057, + "args": { + "External id": 981714,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938609276.661, "dur": 64401.647, + "args": { + "External id": 981715,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938673861.091, "dur": 13.095, + "args": { + "External id": 981716,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938673864.611, "dur": 9.221, + "args": { + "External id": 981717,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938673904.652, "dur": 414.103, + "args": { + "External id": 981718,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938673940.024, "dur": 372.791, + "args": { + "External id": 981719,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4310, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938673953.569, "dur": 353.463, + "args": { + "External id": 981720,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938674344.970, "dur": 2.484, + "args": { + "External id": 981721,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4312, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938674414.104, "dur": 7.104, + "args": { + "External id": 981722,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938674468.082, "dur": 1.327, + "args": { + "External id": 981723,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938674486.013, "dur": 3.594, + "args": { + "External id": 981724,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938674501.935, "dur": 1.004, + "args": { + "External id": 981725,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938674513.697, "dur": 1.084, + "args": { + "External id": 981726,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938674526.184, "dur": 0.954, + "args": { + "External id": 981727,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938674540.363, "dur": 2.696, + "args": { + "External id": 981728,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938674553.360, "dur": 2.070, + "args": { + "External id": 981729,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938674566.015, "dur": 0.796, + "args": { + "External id": 981730,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938674659.495, "dur": 3028.736, + "args": { + "External id": 981731,"Record function id": 0, "Ev Idx": 4322 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.8)", "pid": 2338706, "tid": 2379422, + "ts": 6345938674680.984, "dur": 1133.490, + "args": { + "External id": 981732,"Record function id": 0, "Ev Idx": 4323 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338706, "tid": 2379422, + "ts": 6345938674695.823, "dur": 394.485, + "args": { + "External id": 981733,"Record function id": 0, "Ev Idx": 4324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938674787.086, "dur": 4.306, + "args": { + "External id": 981734,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938674794.868, "dur": 0.642, + "args": { + "External id": 981735,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938674797.157, "dur": 3.266, + "args": { + "External id": 981736,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938674802.355, "dur": 0.937, + "args": { + "External id": 981737,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938674807.301, "dur": 0.923, + "args": { + "External id": 981738,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938674809.641, "dur": 0.921, + "args": { + "External id": 981739,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938674812.545, "dur": 1.997, + "args": { + "External id": 981740,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938674816.005, "dur": 0.744, + "args": { + "External id": 981741,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938674820.574, "dur": 1.034, + "args": { + "External id": 981742,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938674823.294, "dur": 0.723, + "args": { + "External id": 981743,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938674843.250, "dur": 158.135, + "args": { + "External id": 981744,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938674859.857, "dur": 136.849, + "args": { + "External id": 981745,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938674881.265, "dur": 17.890, + "args": { + "External id": 981746,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938674903.011, "dur": 66.179, + "args": { + "External id": 981747,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938674905.765, "dur": 63.070, + "args": { + "External id": 981748,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938674909.666, "dur": 6.410, + "args": { + "External id": 981749,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938674920.018, "dur": 48.244, + "args": { + "External id": 981750,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4341 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.7", "pid": 2338706, "tid": 2379422, + "ts": 6345938675194.033, "dur": 612.194, + "args": { + "External id": 981751,"Record function id": 0, "Ev Idx": 4342 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338706, "tid": 2379422, + "ts": 6345938675212.886, "dur": 580.405, + "args": { + "External id": 981752,"Record function id": 0, "Ev Idx": 4343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938675279.047, "dur": 6.393, + "args": { + "External id": 981753,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938675301.407, "dur": 36.100, + "args": { + "External id": 981754,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938675307.052, "dur": 3.815, + "args": { + "External id": 981755,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938675312.887, "dur": 0.508, + "args": { + "External id": 981756,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938675314.704, "dur": 0.343, + "args": { + "External id": 981757,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938675318.341, "dur": 0.364, + "args": { + "External id": 981758,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938675320.178, "dur": 0.330, + "args": { + "External id": 981759,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938675321.819, "dur": 2.766, + "args": { + "External id": 981760,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938675326.955, "dur": 0.281, + "args": { + "External id": 981761,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938675328.720, "dur": 0.382, + "args": { + "External id": 981762,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938675330.393, "dur": 1.695, + "args": { + "External id": 981763,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938675349.904, "dur": 46.273, + "args": { + "External id": 981764,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345938675428.636, "dur": 121.204, + "args": { + "External id": 981765,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938675439.202, "dur": 4.040, + "args": { + "External id": 981766,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345938675449.145, "dur": 10.621, + "args": { + "External id": 981767,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345938675453.554, "dur": 5.769, + "args": { + "External id": 981768,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938675457.415, "dur": 0.599, + "args": { + "External id": 981769,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938675467.091, "dur": 31.544, + "args": { + "External id": 981770,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938675469.708, "dur": 0.406, + "args": { + "External id": 981771,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938675471.950, "dur": 0.554, + "args": { + "External id": 981772,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938675477.774, "dur": 3.599, + "args": { + "External id": 981773,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938675483.091, "dur": 0.434, + "args": { + "External id": 981774,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938675485.006, "dur": 0.380, + "args": { + "External id": 981775,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938675488.160, "dur": 0.447, + "args": { + "External id": 981776,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938675490.047, "dur": 0.369, + "args": { + "External id": 981777,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938675492.196, "dur": 0.341, + "args": { + "External id": 981778,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938675494.585, "dur": 0.312, + "args": { + "External id": 981779,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938675511.815, "dur": 30.547, + "args": { + "External id": 981780,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938675593.673, "dur": 127.451, + "args": { + "External id": 981781,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938675626.714, "dur": 90.861, + "args": { + "External id": 981782,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4373, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938675636.557, "dur": 76.687, + "args": { + "External id": 981783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938675739.595, "dur": 1.765, + "args": { + "External id": 981784,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4375, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938675822.397, "dur": 1843.903, + "args": { + "External id": 981785,"Sequence number": 10552260, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4376 + } + }, + { + "ph": "f", "id": 206, "pid": 2338706, "tid": 2379422, "ts": 6345938675822.397, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938675937.437, "dur": 168.031, + "args": { + "External id": 981786,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938676178.866, "dur": 44.200, + "args": { + "External id": 981787,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345938676242.886, "dur": 60.060, + "args": { + "External id": 981788,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938676320.894, "dur": 33.131, + "args": { + "External id": 981789,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938676360.244, "dur": 35.345, + "args": { + "External id": 981790,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938676402.279, "dur": 26.271, + "args": { + "External id": 981791,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938676435.420, "dur": 28.530, + "args": { + "External id": 981792,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938676492.731, "dur": 23.222, + "args": { + "External id": 981793,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938676538.559, "dur": 29.234, + "args": { + "External id": 981794,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938676591.867, "dur": 19.389, + "args": { + "External id": 981795,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938676625.736, "dur": 14.469, + "args": { + "External id": 981796,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938676647.522, "dur": 35.264, + "args": { + "External id": 981797,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938676686.256, "dur": 31.591, + "args": { + "External id": 981798,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345938676750.071, "dur": 246.284, + "args": { + "External id": 981799,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938676831.613, "dur": 5.696, + "args": { + "External id": 981800,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938676839.371, "dur": 2.774, + "args": { + "External id": 981801,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938676843.630, "dur": 1.563, + "args": { + "External id": 981802,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938676846.645, "dur": 2.940, + "args": { + "External id": 981803,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938676894.450, "dur": 5.381, + "args": { + "External id": 981804,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938676896.560, "dur": 3.018, + "args": { + "External id": 981805,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938676902.016, "dur": 33.622, + "args": { + "External id": 981806,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938676907.579, "dur": 4.153, + "args": { + "External id": 981807,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938676937.132, "dur": 1.660, + "args": { + "External id": 981808,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938676938.098, "dur": 0.587, + "args": { + "External id": 981809,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938676940.314, "dur": 13.351, + "args": { + "External id": 981810,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938676942.166, "dur": 0.523, + "args": { + "External id": 981811,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938677095.163, "dur": 30.548, + "args": { + "External id": 981812,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938677148.706, "dur": 18.281, + "args": { + "External id": 981813,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938677175.511, "dur": 49.946, + "args": { + "External id": 981814,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938677231.938, "dur": 37.518, + "args": { + "External id": 981815,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938677279.581, "dur": 22.132, + "args": { + "External id": 981816,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938677307.526, "dur": 48.100, + "args": { + "External id": 981817,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938677371.473, "dur": 31.157, + "args": { + "External id": 981818,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938677410.323, "dur": 29.355, + "args": { + "External id": 981819,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345938677468.434, "dur": 23.793, + "args": { + "External id": 981820,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938677509.549, "dur": 29.463, + "args": { + "External id": 981821,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938677557.167, "dur": 17.089, + "args": { + "External id": 981822,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938677595.410, "dur": 12.497, + "args": { + "External id": 981823,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345938677620.400, "dur": 14.759, + "args": { + "External id": 981824,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938677711.621, "dur": 15.731, + "args": { + "External id": 981825,"Record function id": 0, "Ev Idx": 4416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938677714.989, "dur": 11.203, + "args": { + "External id": 981826,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938677719.491, "dur": 5.638, + "args": { + "External id": 981827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938677721.094, "dur": 3.927, + "args": { + "External id": 981828,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938677731.567, "dur": 5.013, + "args": { + "External id": 981829,"Record function id": 0, "Ev Idx": 4420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938677733.037, "dur": 3.057, + "args": { + "External id": 981830,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938677733.972, "dur": 1.496, + "args": { + "External id": 981831,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938677734.494, "dur": 0.887, + "args": { + "External id": 981832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938677739.919, "dur": 7.338, + "args": { + "External id": 981833,"Record function id": 0, "Ev Idx": 4424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938677741.512, "dur": 5.290, + "args": { + "External id": 981834,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938677742.209, "dur": 4.119, + "args": { + "External id": 981835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938677742.883, "dur": 3.349, + "args": { + "External id": 981836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938677750.584, "dur": 4.523, + "args": { + "External id": 981837,"Record function id": 0, "Ev Idx": 4428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938677752.051, "dur": 2.612, + "args": { + "External id": 981838,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938677752.718, "dur": 1.345, + "args": { + "External id": 981839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938677753.184, "dur": 0.786, + "args": { + "External id": 981840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938677758.314, "dur": 4.502, + "args": { + "External id": 981841,"Record function id": 0, "Ev Idx": 4432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938677759.794, "dur": 2.590, + "args": { + "External id": 981842,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938677760.454, "dur": 1.276, + "args": { + "External id": 981843,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938677760.973, "dur": 0.691, + "args": { + "External id": 981844,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938677766.056, "dur": 3.535, + "args": { + "External id": 981845,"Record function id": 0, "Ev Idx": 4436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938677767.229, "dur": 1.925, + "args": { + "External id": 981846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938677767.707, "dur": 1.032, + "args": { + "External id": 981847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938677768.054, "dur": 0.620, + "args": { + "External id": 981848,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938677772.947, "dur": 6.519, + "args": { + "External id": 981849,"Record function id": 0, "Ev Idx": 4440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938677774.054, "dur": 4.980, + "args": { + "External id": 981850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938677774.614, "dur": 3.835, + "args": { + "External id": 981851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938677777.803, "dur": 0.580, + "args": { + "External id": 981852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938677782.856, "dur": 4.059, + "args": { + "External id": 981853,"Record function id": 0, "Ev Idx": 4444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938677784.028, "dur": 2.446, + "args": { + "External id": 981854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938677784.926, "dur": 0.964, + "args": { + "External id": 981855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938677785.209, "dur": 0.614, + "args": { + "External id": 981856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938677790.103, "dur": 4.429, + "args": { + "External id": 981857,"Record function id": 0, "Ev Idx": 4448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938677791.527, "dur": 2.581, + "args": { + "External id": 981858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938677792.200, "dur": 1.314, + "args": { + "External id": 981859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938677792.508, "dur": 0.931, + "args": { + "External id": 981860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938677799.038, "dur": 63756.021, + "args": { + "External id": 981861,"Record function id": 0, "Sequence number": 10552259, "Fwd thread id": 1, "Ev Idx": 4452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938677800.749, "dur": 63744.591, + "args": { + "External id": 981862,"Sequence number": 10552259, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4453 + } + }, + { + "ph": "f", "id": 207, "pid": 2338706, "tid": 2379422, "ts": 6345938677800.749, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.8)", "pid": 2338706, "tid": 2379422, + "ts": 6345938677831.023, "dur": 38.920, + "args": { + "External id": 981863,"Record function id": 0, "Ev Idx": 4454 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.8)", "pid": 2338706, "tid": 2379422, + "ts": 6345938677877.667, "dur": 69.021, + "args": { + "External id": 981864,"Record function id": 0, "Ev Idx": 4455 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.8)", "pid": 2338706, "tid": 2379422, + "ts": 6345938677953.341, "dur": 63583.855, + "args": { + "External id": 981865,"Record function id": 0, "Ev Idx": 4456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938678113.329, "dur": 9.040, + "args": { + "External id": 981866,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938678134.909, "dur": 7.285, + "args": { + "External id": 981867,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938678161.112, "dur": 62473.208, + "args": { + "External id": 981868,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938678176.072, "dur": 62444.080, + "args": { + "External id": 981869,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938678273.691, "dur": 18.921, + "args": { + "External id": 981870,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938678315.958, "dur": 62257.233, + "args": { + "External id": 981871,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938678318.923, "dur": 62253.210, + "args": { + "External id": 981872,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938678325.559, "dur": 9.990, + "args": { + "External id": 981873,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938678337.541, "dur": 62229.546, + "args": { + "External id": 981874,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938740748.221, "dur": 12.673, + "args": { + "External id": 981875,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938740751.816, "dur": 8.717, + "args": { + "External id": 981876,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938740793.954, "dur": 436.054, + "args": { + "External id": 981877,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938740828.051, "dur": 396.068, + "args": { + "External id": 981878,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4469, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938740840.757, "dur": 377.084, + "args": { + "External id": 981879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938741255.755, "dur": 2.130, + "args": { + "External id": 981880,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4471, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938741323.395, "dur": 7.327, + "args": { + "External id": 981881,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938741377.083, "dur": 1.194, + "args": { + "External id": 981882,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938741394.248, "dur": 3.600, + "args": { + "External id": 981883,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938741409.755, "dur": 0.793, + "args": { + "External id": 981884,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938741423.051, "dur": 0.897, + "args": { + "External id": 981885,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938741434.443, "dur": 0.892, + "args": { + "External id": 981886,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938741445.833, "dur": 3.129, + "args": { + "External id": 981887,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938741460.282, "dur": 2.233, + "args": { + "External id": 981888,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938741473.462, "dur": 0.887, + "args": { + "External id": 981889,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938741570.003, "dur": 3027.456, + "args": { + "External id": 981890,"Record function id": 0, "Ev Idx": 4481 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.7)", "pid": 2338706, "tid": 2379422, + "ts": 6345938741590.673, "dur": 1108.891, + "args": { + "External id": 981891,"Record function id": 0, "Ev Idx": 4482 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338706, "tid": 2379422, + "ts": 6345938741605.862, "dur": 332.983, + "args": { + "External id": 981892,"Record function id": 0, "Ev Idx": 4483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938741698.122, "dur": 3.978, + "args": { + "External id": 981893,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938741705.363, "dur": 0.811, + "args": { + "External id": 981894,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938741708.489, "dur": 2.997, + "args": { + "External id": 981895,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938741713.383, "dur": 0.900, + "args": { + "External id": 981896,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938741717.807, "dur": 0.805, + "args": { + "External id": 981897,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938741720.044, "dur": 0.720, + "args": { + "External id": 981898,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938741722.536, "dur": 1.664, + "args": { + "External id": 981899,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938741725.786, "dur": 0.745, + "args": { + "External id": 981900,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938741729.825, "dur": 0.798, + "args": { + "External id": 981901,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938741731.967, "dur": 1.067, + "args": { + "External id": 981902,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938741751.579, "dur": 156.895, + "args": { + "External id": 981903,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938741768.723, "dur": 135.192, + "args": { + "External id": 981904,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938741788.715, "dur": 19.386, + "args": { + "External id": 981905,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938741811.743, "dur": 63.860, + "args": { + "External id": 981906,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938741816.418, "dur": 58.870, + "args": { + "External id": 981907,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938741820.530, "dur": 4.926, + "args": { + "External id": 981908,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938741827.119, "dur": 47.659, + "args": { + "External id": 981909,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4500 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.6", "pid": 2338706, "tid": 2379422, + "ts": 6345938742050.657, "dur": 641.167, + "args": { + "External id": 981910,"Record function id": 0, "Ev Idx": 4501 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338706, "tid": 2379422, + "ts": 6345938742106.421, "dur": 572.741, + "args": { + "External id": 981911,"Record function id": 0, "Ev Idx": 4502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938742174.599, "dur": 6.638, + "args": { + "External id": 981912,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938742197.038, "dur": 36.945, + "args": { + "External id": 981913,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938742202.838, "dur": 4.473, + "args": { + "External id": 981914,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938742209.520, "dur": 0.484, + "args": { + "External id": 981915,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938742211.470, "dur": 0.396, + "args": { + "External id": 981916,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938742215.203, "dur": 0.361, + "args": { + "External id": 981917,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938742217.189, "dur": 0.458, + "args": { + "External id": 981918,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938742219.271, "dur": 2.530, + "args": { + "External id": 981919,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938742224.140, "dur": 0.242, + "args": { + "External id": 981920,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938742225.890, "dur": 0.301, + "args": { + "External id": 981921,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938742227.605, "dur": 1.232, + "args": { + "External id": 981922,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938742245.757, "dur": 46.575, + "args": { + "External id": 981923,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345938742324.900, "dur": 112.958, + "args": { + "External id": 981924,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938742334.653, "dur": 3.847, + "args": { + "External id": 981925,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345938742343.675, "dur": 11.056, + "args": { + "External id": 981926,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345938742348.133, "dur": 6.162, + "args": { + "External id": 981927,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938742352.438, "dur": 0.713, + "args": { + "External id": 981928,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938742362.016, "dur": 28.018, + "args": { + "External id": 981929,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938742364.371, "dur": 0.454, + "args": { + "External id": 981930,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938742366.707, "dur": 0.412, + "args": { + "External id": 981931,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938742368.636, "dur": 3.375, + "args": { + "External id": 981932,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938742373.568, "dur": 0.454, + "args": { + "External id": 981933,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938742375.702, "dur": 0.371, + "args": { + "External id": 981934,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938742378.645, "dur": 0.271, + "args": { + "External id": 981935,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938742380.776, "dur": 0.585, + "args": { + "External id": 981936,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938742382.917, "dur": 0.320, + "args": { + "External id": 981937,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938742386.117, "dur": 0.312, + "args": { + "External id": 981938,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938742399.717, "dur": 30.328, + "args": { + "External id": 981939,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938742482.926, "dur": 123.211, + "args": { + "External id": 981940,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938742513.546, "dur": 89.063, + "args": { + "External id": 981941,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4532, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938742523.353, "dur": 74.831, + "args": { + "External id": 981942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938742626.879, "dur": 1.890, + "args": { + "External id": 981943,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4534, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938742707.304, "dur": 1863.246, + "args": { + "External id": 981944,"Sequence number": 10552258, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4535 + } + }, + { + "ph": "f", "id": 208, "pid": 2338706, "tid": 2379422, "ts": 6345938742707.304, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938742820.855, "dur": 103.449, + "args": { + "External id": 981945,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938742967.858, "dur": 62.051, + "args": { + "External id": 981946,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345938743111.872, "dur": 64.530, + "args": { + "External id": 981947,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938743195.483, "dur": 33.764, + "args": { + "External id": 981948,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938743235.649, "dur": 31.869, + "args": { + "External id": 981949,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938743274.207, "dur": 31.707, + "args": { + "External id": 981950,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938743312.762, "dur": 28.536, + "args": { + "External id": 981951,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938743377.651, "dur": 27.449, + "args": { + "External id": 981952,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938743426.076, "dur": 29.099, + "args": { + "External id": 981953,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938743477.928, "dur": 19.654, + "args": { + "External id": 981954,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938743512.587, "dur": 14.513, + "args": { + "External id": 981955,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938743534.058, "dur": 34.897, + "args": { + "External id": 981956,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938743572.111, "dur": 31.662, + "args": { + "External id": 981957,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345938743636.004, "dur": 258.107, + "args": { + "External id": 981958,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938743714.902, "dur": 6.111, + "args": { + "External id": 981959,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938743723.261, "dur": 2.197, + "args": { + "External id": 981960,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938743726.871, "dur": 1.585, + "args": { + "External id": 981961,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938743729.766, "dur": 4.237, + "args": { + "External id": 981962,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938743784.717, "dur": 10.615, + "args": { + "External id": 981963,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938743792.313, "dur": 2.765, + "args": { + "External id": 981964,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938743797.650, "dur": 32.549, + "args": { + "External id": 981965,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938743803.136, "dur": 3.599, + "args": { + "External id": 981966,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938743831.561, "dur": 1.755, + "args": { + "External id": 981967,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938743832.436, "dur": 0.793, + "args": { + "External id": 981968,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938743834.196, "dur": 16.620, + "args": { + "External id": 981969,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938743838.676, "dur": 0.455, + "args": { + "External id": 981970,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938743935.119, "dur": 27.084, + "args": { + "External id": 981971,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938743981.916, "dur": 19.393, + "args": { + "External id": 981972,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938744028.096, "dur": 86.172, + "args": { + "External id": 981973,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938744125.288, "dur": 40.949, + "args": { + "External id": 981974,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938744177.964, "dur": 23.713, + "args": { + "External id": 981975,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938744207.290, "dur": 29.724, + "args": { + "External id": 981976,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938744243.439, "dur": 30.292, + "args": { + "External id": 981977,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938744282.337, "dur": 43.011, + "args": { + "External id": 981978,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345938744358.931, "dur": 28.587, + "args": { + "External id": 981979,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938744407.760, "dur": 24.900, + "args": { + "External id": 981980,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938744450.521, "dur": 16.865, + "args": { + "External id": 981981,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938744491.510, "dur": 14.021, + "args": { + "External id": 981982,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345938744518.522, "dur": 15.510, + "args": { + "External id": 981983,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938744621.418, "dur": 20.218, + "args": { + "External id": 981984,"Record function id": 0, "Ev Idx": 4575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938744625.970, "dur": 14.622, + "args": { + "External id": 981985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938744630.400, "dur": 5.777, + "args": { + "External id": 981986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938744631.745, "dur": 4.340, + "args": { + "External id": 981987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938744645.656, "dur": 5.066, + "args": { + "External id": 981988,"Record function id": 0, "Ev Idx": 4579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938744647.395, "dur": 2.750, + "args": { + "External id": 981989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938744648.067, "dur": 1.586, + "args": { + "External id": 981990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938744648.678, "dur": 0.901, + "args": { + "External id": 981991,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938744654.080, "dur": 7.532, + "args": { + "External id": 981992,"Record function id": 0, "Ev Idx": 4583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938744655.821, "dur": 5.329, + "args": { + "External id": 981993,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938744656.490, "dur": 4.235, + "args": { + "External id": 981994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938744657.349, "dur": 3.280, + "args": { + "External id": 981995,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938744664.847, "dur": 4.593, + "args": { + "External id": 981996,"Record function id": 0, "Ev Idx": 4587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938744666.514, "dur": 2.502, + "args": { + "External id": 981997,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938744667.023, "dur": 1.591, + "args": { + "External id": 981998,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938744667.658, "dur": 0.886, + "args": { + "External id": 981999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938744672.629, "dur": 3.936, + "args": { + "External id": 982000,"Record function id": 0, "Ev Idx": 4591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938744673.908, "dur": 2.244, + "args": { + "External id": 982001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938744674.627, "dur": 1.128, + "args": { + "External id": 982002,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938744675.070, "dur": 0.612, + "args": { + "External id": 982003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938744679.760, "dur": 4.023, + "args": { + "External id": 982004,"Record function id": 0, "Ev Idx": 4595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938744681.123, "dur": 2.222, + "args": { + "External id": 982005,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938744681.905, "dur": 1.040, + "args": { + "External id": 982006,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938744682.310, "dur": 0.562, + "args": { + "External id": 982007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938744687.065, "dur": 6.567, + "args": { + "External id": 982008,"Record function id": 0, "Ev Idx": 4599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938744688.584, "dur": 4.634, + "args": { + "External id": 982009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938744689.260, "dur": 3.508, + "args": { + "External id": 982010,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938744691.997, "dur": 0.677, + "args": { + "External id": 982011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938744696.816, "dur": 7.898, + "args": { + "External id": 982012,"Record function id": 0, "Ev Idx": 4603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938744702.107, "dur": 2.188, + "args": { + "External id": 982013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938744702.610, "dur": 1.289, + "args": { + "External id": 982014,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938744703.099, "dur": 0.719, + "args": { + "External id": 982015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938744707.789, "dur": 87.576, + "args": { + "External id": 982016,"Record function id": 0, "Ev Idx": 4607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938744791.184, "dur": 3.485, + "args": { + "External id": 982017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938744792.694, "dur": 1.352, + "args": { + "External id": 982018,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938744793.165, "dur": 0.804, + "args": { + "External id": 982019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938744800.680, "dur": 67706.824, + "args": { + "External id": 982020,"Record function id": 0, "Sequence number": 10552257, "Fwd thread id": 1, "Ev Idx": 4611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938744802.795, "dur": 67695.542, + "args": { + "External id": 982021,"Sequence number": 10552257, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4612 + } + }, + { + "ph": "f", "id": 209, "pid": 2338706, "tid": 2379422, "ts": 6345938744802.795, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.7)", "pid": 2338706, "tid": 2379422, + "ts": 6345938744834.772, "dur": 41.221, + "args": { + "External id": 982022,"Record function id": 0, "Ev Idx": 4613 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.7)", "pid": 2338706, "tid": 2379422, + "ts": 6345938744884.453, "dur": 68.423, + "args": { + "External id": 982023,"Record function id": 0, "Ev Idx": 4614 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.7)", "pid": 2338706, "tid": 2379422, + "ts": 6345938744959.060, "dur": 67531.075, + "args": { + "External id": 982024,"Record function id": 0, "Ev Idx": 4615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938745109.846, "dur": 9.694, + "args": { + "External id": 982025,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938745131.159, "dur": 7.673, + "args": { + "External id": 982026,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938745157.136, "dur": 66400.927, + "args": { + "External id": 982027,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938745171.865, "dur": 66371.835, + "args": { + "External id": 982028,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938745282.327, "dur": 18.456, + "args": { + "External id": 982029,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938745320.100, "dur": 66177.360, + "args": { + "External id": 982030,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938745322.832, "dur": 66173.424, + "args": { + "External id": 982031,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938745328.034, "dur": 12.800, + "args": { + "External id": 982032,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938745343.197, "dur": 66148.024, + "args": { + "External id": 982033,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938811672.955, "dur": 12.807, + "args": { + "External id": 982034,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938811676.676, "dur": 8.667, + "args": { + "External id": 982035,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938811720.176, "dur": 452.189, + "args": { + "External id": 982036,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938811755.625, "dur": 411.282, + "args": { + "External id": 982037,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4628, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938811769.517, "dur": 390.254, + "args": { + "External id": 982038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938812199.963, "dur": 2.658, + "args": { + "External id": 982039,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4630, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938812271.745, "dur": 7.096, + "args": { + "External id": 982040,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938812327.767, "dur": 1.500, + "args": { + "External id": 982041,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938812346.596, "dur": 3.138, + "args": { + "External id": 982042,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938812361.605, "dur": 0.733, + "args": { + "External id": 982043,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938812375.598, "dur": 0.959, + "args": { + "External id": 982044,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938812387.365, "dur": 0.880, + "args": { + "External id": 982045,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938812398.913, "dur": 3.037, + "args": { + "External id": 982046,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938812412.783, "dur": 2.333, + "args": { + "External id": 982047,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938812425.652, "dur": 0.838, + "args": { + "External id": 982048,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938812522.512, "dur": 3154.733, + "args": { + "External id": 982049,"Record function id": 0, "Ev Idx": 4640 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.6)", "pid": 2338706, "tid": 2379422, + "ts": 6345938812545.471, "dur": 1155.199, + "args": { + "External id": 982050,"Record function id": 0, "Ev Idx": 4641 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338706, "tid": 2379422, + "ts": 6345938812560.552, "dur": 342.661, + "args": { + "External id": 982051,"Record function id": 0, "Ev Idx": 4642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938812656.885, "dur": 4.492, + "args": { + "External id": 982052,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938812664.955, "dur": 0.860, + "args": { + "External id": 982053,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938812668.082, "dur": 3.199, + "args": { + "External id": 982054,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938812675.440, "dur": 0.639, + "args": { + "External id": 982055,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938812677.595, "dur": 0.744, + "args": { + "External id": 982056,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938812679.861, "dur": 0.793, + "args": { + "External id": 982057,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938812682.583, "dur": 1.892, + "args": { + "External id": 982058,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938812687.941, "dur": 0.664, + "args": { + "External id": 982059,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938812690.392, "dur": 1.023, + "args": { + "External id": 982060,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938812692.916, "dur": 0.660, + "args": { + "External id": 982061,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938812711.901, "dur": 162.033, + "args": { + "External id": 982062,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938812729.925, "dur": 139.138, + "args": { + "External id": 982063,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938812750.758, "dur": 18.019, + "args": { + "External id": 982064,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938812774.744, "dur": 66.623, + "args": { + "External id": 982065,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938812779.200, "dur": 61.760, + "args": { + "External id": 982066,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938812783.506, "dur": 5.412, + "args": { + "External id": 982067,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938812790.506, "dur": 49.803, + "args": { + "External id": 982068,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4659 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.5", "pid": 2338706, "tid": 2379422, + "ts": 6345938812995.901, "dur": 697.590, + "args": { + "External id": 982069,"Record function id": 0, "Ev Idx": 4660 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338706, "tid": 2379422, + "ts": 6345938813034.863, "dur": 645.914, + "args": { + "External id": 982070,"Record function id": 0, "Ev Idx": 4661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938813146.547, "dur": 7.367, + "args": { + "External id": 982071,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938813171.540, "dur": 36.225, + "args": { + "External id": 982072,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938813176.970, "dur": 2.622, + "args": { + "External id": 982073,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938813183.298, "dur": 0.685, + "args": { + "External id": 982074,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938813185.674, "dur": 0.569, + "args": { + "External id": 982075,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938813187.824, "dur": 0.590, + "args": { + "External id": 982076,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938813191.192, "dur": 0.318, + "args": { + "External id": 982077,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938813193.079, "dur": 2.860, + "args": { + "External id": 982078,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938813197.362, "dur": 1.943, + "args": { + "External id": 982079,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938813201.010, "dur": 0.329, + "args": { + "External id": 982080,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938813202.596, "dur": 0.378, + "args": { + "External id": 982081,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938813219.632, "dur": 51.703, + "args": { + "External id": 982082,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345938813304.119, "dur": 121.858, + "args": { + "External id": 982083,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938813314.795, "dur": 3.512, + "args": { + "External id": 982084,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345938813323.888, "dur": 10.726, + "args": { + "External id": 982085,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345938813328.373, "dur": 5.800, + "args": { + "External id": 982086,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938813332.337, "dur": 0.597, + "args": { + "External id": 982087,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938813341.621, "dur": 30.518, + "args": { + "External id": 982088,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938813344.177, "dur": 1.792, + "args": { + "External id": 982089,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938813348.021, "dur": 0.453, + "args": { + "External id": 982090,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938813350.192, "dur": 2.939, + "args": { + "External id": 982091,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938813355.895, "dur": 0.348, + "args": { + "External id": 982092,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938813357.663, "dur": 0.270, + "args": { + "External id": 982093,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938813359.607, "dur": 0.442, + "args": { + "External id": 982094,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938813362.441, "dur": 0.536, + "args": { + "External id": 982095,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938813364.890, "dur": 0.622, + "args": { + "External id": 982096,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938813366.811, "dur": 1.439, + "args": { + "External id": 982097,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938813385.008, "dur": 32.858, + "args": { + "External id": 982098,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938813475.683, "dur": 129.480, + "args": { + "External id": 982099,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938813508.332, "dur": 93.369, + "args": { + "External id": 982100,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4691, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938813518.195, "dur": 79.177, + "args": { + "External id": 982101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938813625.676, "dur": 2.194, + "args": { + "External id": 982102,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4693, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938813709.030, "dur": 1940.753, + "args": { + "External id": 982103,"Sequence number": 10552256, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4694 + } + }, + { + "ph": "f", "id": 210, "pid": 2338706, "tid": 2379422, "ts": 6345938813709.030, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938813826.516, "dur": 107.375, + "args": { + "External id": 982104,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938813983.688, "dur": 109.662, + "args": { + "External id": 982105,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345938814116.559, "dur": 59.192, + "args": { + "External id": 982106,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938814189.011, "dur": 32.621, + "args": { + "External id": 982107,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938814254.335, "dur": 35.886, + "args": { + "External id": 982108,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938814297.061, "dur": 27.307, + "args": { + "External id": 982109,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938814333.148, "dur": 28.697, + "args": { + "External id": 982110,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938814395.597, "dur": 27.692, + "args": { + "External id": 982111,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938814458.105, "dur": 30.770, + "args": { + "External id": 982112,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938814513.968, "dur": 23.163, + "args": { + "External id": 982113,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938814551.366, "dur": 15.792, + "args": { + "External id": 982114,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938814576.478, "dur": 37.746, + "args": { + "External id": 982115,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938814618.699, "dur": 32.633, + "args": { + "External id": 982116,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345938814683.065, "dur": 260.121, + "args": { + "External id": 982117,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938814767.146, "dur": 6.454, + "args": { + "External id": 982118,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938814776.067, "dur": 2.845, + "args": { + "External id": 982119,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938814780.429, "dur": 4.890, + "args": { + "External id": 982120,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938814786.748, "dur": 2.111, + "args": { + "External id": 982121,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938814834.270, "dur": 7.764, + "args": { + "External id": 982122,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938814839.096, "dur": 2.715, + "args": { + "External id": 982123,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938814844.839, "dur": 33.836, + "args": { + "External id": 982124,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938814850.358, "dur": 3.837, + "args": { + "External id": 982125,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938814880.084, "dur": 1.915, + "args": { + "External id": 982126,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938814881.402, "dur": 0.507, + "args": { + "External id": 982127,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938814883.144, "dur": 17.971, + "args": { + "External id": 982128,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938814886.967, "dur": 0.748, + "args": { + "External id": 982129,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938814983.660, "dur": 46.394, + "args": { + "External id": 982130,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938815088.833, "dur": 24.427, + "args": { + "External id": 982131,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938815124.645, "dur": 49.402, + "args": { + "External id": 982132,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938815191.280, "dur": 50.299, + "args": { + "External id": 982133,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938815260.174, "dur": 20.523, + "args": { + "External id": 982134,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938815286.665, "dur": 29.662, + "args": { + "External id": 982135,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938815323.731, "dur": 33.653, + "args": { + "External id": 982136,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938815369.358, "dur": 30.510, + "args": { + "External id": 982137,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345938815426.402, "dur": 30.191, + "args": { + "External id": 982138,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938815476.373, "dur": 30.638, + "args": { + "External id": 982139,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938815526.665, "dur": 17.023, + "args": { + "External id": 982140,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938815569.268, "dur": 15.114, + "args": { + "External id": 982141,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345938815598.482, "dur": 18.431, + "args": { + "External id": 982142,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938815700.716, "dur": 16.014, + "args": { + "External id": 982143,"Record function id": 0, "Ev Idx": 4734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938815704.197, "dur": 11.396, + "args": { + "External id": 982144,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938815708.828, "dur": 5.769, + "args": { + "External id": 982145,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938815710.667, "dur": 3.846, + "args": { + "External id": 982146,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938815720.978, "dur": 8.597, + "args": { + "External id": 982147,"Record function id": 0, "Ev Idx": 4738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938815725.991, "dur": 3.097, + "args": { + "External id": 982148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938815727.033, "dur": 1.372, + "args": { + "External id": 982149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938815727.587, "dur": 0.728, + "args": { + "External id": 982150,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938815733.008, "dur": 7.962, + "args": { + "External id": 982151,"Record function id": 0, "Ev Idx": 4742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938815735.336, "dur": 5.211, + "args": { + "External id": 982152,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938815735.875, "dur": 4.113, + "args": { + "External id": 982153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938815736.685, "dur": 3.180, + "args": { + "External id": 982154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938815744.262, "dur": 4.312, + "args": { + "External id": 982155,"Record function id": 0, "Ev Idx": 4746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938815745.684, "dur": 2.438, + "args": { + "External id": 982156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938815746.383, "dur": 1.299, + "args": { + "External id": 982157,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938815746.727, "dur": 0.880, + "args": { + "External id": 982158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938815751.797, "dur": 3.667, + "args": { + "External id": 982159,"Record function id": 0, "Ev Idx": 4750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938815753.022, "dur": 2.014, + "args": { + "External id": 982160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938815753.587, "dur": 0.973, + "args": { + "External id": 982161,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938815753.902, "dur": 0.592, + "args": { + "External id": 982162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938815758.598, "dur": 4.063, + "args": { + "External id": 982163,"Record function id": 0, "Ev Idx": 4754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938815760.029, "dur": 2.176, + "args": { + "External id": 982164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938815760.742, "dur": 1.049, + "args": { + "External id": 982165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938815761.090, "dur": 0.635, + "args": { + "External id": 982166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938815765.843, "dur": 6.266, + "args": { + "External id": 982167,"Record function id": 0, "Ev Idx": 4758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938815767.127, "dur": 4.570, + "args": { + "External id": 982168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938815767.610, "dur": 3.657, + "args": { + "External id": 982169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938815770.456, "dur": 0.698, + "args": { + "External id": 982170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938815775.552, "dur": 3.964, + "args": { + "External id": 982171,"Record function id": 0, "Ev Idx": 4762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938815777.017, "dur": 2.101, + "args": { + "External id": 982172,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938815777.497, "dur": 1.221, + "args": { + "External id": 982173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938815777.786, "dur": 0.841, + "args": { + "External id": 982174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938815782.956, "dur": 5.353, + "args": { + "External id": 982175,"Record function id": 0, "Ev Idx": 4766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938815784.608, "dur": 3.278, + "args": { + "External id": 982176,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938815785.333, "dur": 1.844, + "args": { + "External id": 982177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938815785.830, "dur": 1.277, + "args": { + "External id": 982178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938815792.762, "dur": 63506.561, + "args": { + "External id": 982179,"Record function id": 0, "Sequence number": 10552255, "Fwd thread id": 1, "Ev Idx": 4770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938815794.465, "dur": 63495.384, + "args": { + "External id": 982180,"Sequence number": 10552255, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4771 + } + }, + { + "ph": "f", "id": 211, "pid": 2338706, "tid": 2379422, "ts": 6345938815794.465, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.6)", "pid": 2338706, "tid": 2379422, + "ts": 6345938815824.829, "dur": 38.866, + "args": { + "External id": 982181,"Record function id": 0, "Ev Idx": 4772 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.6)", "pid": 2338706, "tid": 2379422, + "ts": 6345938815871.516, "dur": 68.360, + "args": { + "External id": 982182,"Record function id": 0, "Ev Idx": 4773 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.6)", "pid": 2338706, "tid": 2379422, + "ts": 6345938815946.253, "dur": 63334.563, + "args": { + "External id": 982183,"Record function id": 0, "Ev Idx": 4774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938816091.531, "dur": 9.431, + "args": { + "External id": 982184,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938816114.664, "dur": 7.414, + "args": { + "External id": 982185,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938816142.683, "dur": 62175.446, + "args": { + "External id": 982186,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938816160.108, "dur": 62143.318, + "args": { + "External id": 982187,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938816253.431, "dur": 18.763, + "args": { + "External id": 982188,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938816291.691, "dur": 61963.522, + "args": { + "External id": 982189,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938816294.534, "dur": 61959.535, + "args": { + "External id": 982190,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938816299.320, "dur": 12.000, + "args": { + "External id": 982191,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938816313.884, "dur": 61934.770, + "args": { + "External id": 982192,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938878433.958, "dur": 13.542, + "args": { + "External id": 982193,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938878437.758, "dur": 9.287, + "args": { + "External id": 982194,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938878479.734, "dur": 436.342, + "args": { + "External id": 982195,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938878515.289, "dur": 395.436, + "args": { + "External id": 982196,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4787, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938878528.957, "dur": 376.286, + "args": { + "External id": 982197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938878939.873, "dur": 2.149, + "args": { + "External id": 982198,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4789, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938879005.633, "dur": 25.587, + "args": { + "External id": 982199,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938879114.793, "dur": 2.471, + "args": { + "External id": 982200,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938879134.355, "dur": 4.067, + "args": { + "External id": 982201,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938879151.301, "dur": 0.958, + "args": { + "External id": 982202,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938879167.847, "dur": 0.858, + "args": { + "External id": 982203,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938879179.809, "dur": 0.763, + "args": { + "External id": 982204,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938879191.311, "dur": 2.398, + "args": { + "External id": 982205,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938879203.929, "dur": 2.246, + "args": { + "External id": 982206,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938879215.856, "dur": 0.942, + "args": { + "External id": 982207,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938879317.256, "dur": 3091.378, + "args": { + "External id": 982208,"Record function id": 0, "Ev Idx": 4799 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.5)", "pid": 2338706, "tid": 2379422, + "ts": 6345938879340.188, "dur": 1129.736, + "args": { + "External id": 982209,"Record function id": 0, "Ev Idx": 4800 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338706, "tid": 2379422, + "ts": 6345938879356.083, "dur": 340.481, + "args": { + "External id": 982210,"Record function id": 0, "Ev Idx": 4801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938879450.817, "dur": 4.145, + "args": { + "External id": 982211,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938879458.197, "dur": 1.059, + "args": { + "External id": 982212,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938879461.248, "dur": 3.187, + "args": { + "External id": 982213,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938879468.710, "dur": 0.731, + "args": { + "External id": 982214,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938879471.135, "dur": 0.917, + "args": { + "External id": 982215,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938879473.817, "dur": 1.157, + "args": { + "External id": 982216,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938879476.712, "dur": 1.916, + "args": { + "External id": 982217,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938879482.309, "dur": 0.833, + "args": { + "External id": 982218,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938879485.041, "dur": 0.729, + "args": { + "External id": 982219,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938879487.612, "dur": 0.566, + "args": { + "External id": 982220,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938879507.587, "dur": 159.935, + "args": { + "External id": 982221,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938879524.944, "dur": 137.404, + "args": { + "External id": 982222,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938879544.128, "dur": 18.368, + "args": { + "External id": 982223,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938879568.542, "dur": 66.251, + "args": { + "External id": 982224,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938879572.783, "dur": 61.673, + "args": { + "External id": 982225,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938879577.197, "dur": 5.470, + "args": { + "External id": 982226,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938879584.317, "dur": 49.332, + "args": { + "External id": 982227,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4818 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.4", "pid": 2338706, "tid": 2379422, + "ts": 6345938879784.889, "dur": 676.863, + "args": { + "External id": 982228,"Record function id": 0, "Ev Idx": 4819 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338706, "tid": 2379422, + "ts": 6345938879800.818, "dur": 648.019, + "args": { + "External id": 982229,"Record function id": 0, "Ev Idx": 4820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938879860.453, "dur": 4.570, + "args": { + "External id": 982230,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938879880.755, "dur": 35.037, + "args": { + "External id": 982231,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938879886.148, "dur": 1.729, + "args": { + "External id": 982232,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938879891.204, "dur": 0.511, + "args": { + "External id": 982233,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938879893.518, "dur": 0.660, + "args": { + "External id": 982234,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938879895.621, "dur": 0.404, + "args": { + "External id": 982235,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938879898.490, "dur": 0.681, + "args": { + "External id": 982236,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938879900.629, "dur": 2.569, + "args": { + "External id": 982237,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938879905.208, "dur": 1.747, + "args": { + "External id": 982238,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938879908.505, "dur": 0.613, + "args": { + "External id": 982239,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938879910.925, "dur": 0.546, + "args": { + "External id": 982240,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938879926.978, "dur": 42.943, + "args": { + "External id": 982241,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345938880002.760, "dur": 189.846, + "args": { + "External id": 982242,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938880031.405, "dur": 5.494, + "args": { + "External id": 982243,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345938880043.025, "dur": 48.923, + "args": { + "External id": 982244,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345938880047.341, "dur": 44.143, + "args": { + "External id": 982245,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938880051.628, "dur": 37.097, + "args": { + "External id": 982246,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938880102.368, "dur": 31.384, + "args": { + "External id": 982247,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938880105.660, "dur": 2.043, + "args": { + "External id": 982248,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938880109.182, "dur": 0.600, + "args": { + "External id": 982249,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938880111.695, "dur": 2.484, + "args": { + "External id": 982250,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938880116.918, "dur": 0.417, + "args": { + "External id": 982251,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938880119.306, "dur": 0.259, + "args": { + "External id": 982252,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938880121.232, "dur": 0.320, + "args": { + "External id": 982253,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938880124.589, "dur": 0.281, + "args": { + "External id": 982254,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938880126.218, "dur": 0.341, + "args": { + "External id": 982255,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938880128.253, "dur": 1.588, + "args": { + "External id": 982256,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938880145.593, "dur": 38.128, + "args": { + "External id": 982257,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938880242.528, "dur": 129.053, + "args": { + "External id": 982258,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938880275.030, "dur": 92.856, + "args": { + "External id": 982259,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4850, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938880284.499, "dur": 79.051, + "args": { + "External id": 982260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938880392.759, "dur": 1.841, + "args": { + "External id": 982261,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4852, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938880479.036, "dur": 1907.464, + "args": { + "External id": 982262,"Sequence number": 10552254, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4853 + } + }, + { + "ph": "f", "id": 212, "pid": 2338706, "tid": 2379422, "ts": 6345938880479.036, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938880594.253, "dur": 106.615, + "args": { + "External id": 982263,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938880800.565, "dur": 43.004, + "args": { + "External id": 982264,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345938880864.308, "dur": 52.164, + "args": { + "External id": 982265,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938880931.177, "dur": 31.350, + "args": { + "External id": 982266,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938880968.773, "dur": 33.153, + "args": { + "External id": 982267,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938881030.047, "dur": 66.368, + "args": { + "External id": 982268,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938881116.313, "dur": 34.211, + "args": { + "External id": 982269,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938881187.960, "dur": 26.320, + "args": { + "External id": 982270,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938881236.044, "dur": 31.058, + "args": { + "External id": 982271,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938881293.032, "dur": 19.633, + "args": { + "External id": 982272,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938881328.116, "dur": 14.607, + "args": { + "External id": 982273,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938881350.450, "dur": 36.008, + "args": { + "External id": 982274,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938881389.652, "dur": 32.469, + "args": { + "External id": 982275,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345938881455.483, "dur": 254.916, + "args": { + "External id": 982276,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938881538.112, "dur": 6.503, + "args": { + "External id": 982277,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938881547.025, "dur": 3.470, + "args": { + "External id": 982278,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938881552.045, "dur": 3.545, + "args": { + "External id": 982279,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938881557.359, "dur": 3.758, + "args": { + "External id": 982280,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938881603.325, "dur": 7.590, + "args": { + "External id": 982281,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938881608.050, "dur": 2.680, + "args": { + "External id": 982282,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938881613.290, "dur": 33.723, + "args": { + "External id": 982283,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938881619.178, "dur": 3.911, + "args": { + "External id": 982284,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938881648.745, "dur": 2.026, + "args": { + "External id": 982285,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938881649.955, "dur": 0.717, + "args": { + "External id": 982286,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938881651.857, "dur": 14.973, + "args": { + "External id": 982287,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938881654.349, "dur": 0.904, + "args": { + "External id": 982288,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938881748.616, "dur": 25.806, + "args": { + "External id": 982289,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938881794.420, "dur": 15.691, + "args": { + "External id": 982290,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938881817.282, "dur": 37.242, + "args": { + "External id": 982291,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938881860.825, "dur": 53.103, + "args": { + "External id": 982292,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938881931.278, "dur": 23.697, + "args": { + "External id": 982293,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938881960.850, "dur": 34.473, + "args": { + "External id": 982294,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938882002.125, "dur": 84.648, + "args": { + "External id": 982295,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938882098.591, "dur": 39.714, + "args": { + "External id": 982296,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345938882168.399, "dur": 29.805, + "args": { + "External id": 982297,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938882216.174, "dur": 32.790, + "args": { + "External id": 982298,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938882267.666, "dur": 18.270, + "args": { + "External id": 982299,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938882307.351, "dur": 15.331, + "args": { + "External id": 982300,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345938882339.288, "dur": 16.527, + "args": { + "External id": 982301,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938882432.553, "dur": 17.469, + "args": { + "External id": 982302,"Record function id": 0, "Ev Idx": 4893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938882437.328, "dur": 11.660, + "args": { + "External id": 982303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938882442.051, "dur": 5.888, + "args": { + "External id": 982304,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938882443.659, "dur": 4.194, + "args": { + "External id": 982305,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938882453.991, "dur": 5.122, + "args": { + "External id": 982306,"Record function id": 0, "Ev Idx": 4897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938882455.902, "dur": 2.740, + "args": { + "External id": 982307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938882456.842, "dur": 1.357, + "args": { + "External id": 982308,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938882457.383, "dur": 0.688, + "args": { + "External id": 982309,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938882462.429, "dur": 7.680, + "args": { + "External id": 982310,"Record function id": 0, "Ev Idx": 4901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938882464.384, "dur": 5.262, + "args": { + "External id": 982311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938882465.207, "dur": 3.998, + "args": { + "External id": 982312,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938882465.799, "dur": 3.300, + "args": { + "External id": 982313,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938882473.347, "dur": 4.782, + "args": { + "External id": 982314,"Record function id": 0, "Ev Idx": 4905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938882475.159, "dur": 2.563, + "args": { + "External id": 982315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938882475.659, "dur": 1.639, + "args": { + "External id": 982316,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938882476.344, "dur": 0.886, + "args": { + "External id": 982317,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938882481.452, "dur": 4.914, + "args": { + "External id": 982318,"Record function id": 0, "Ev Idx": 4909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938882483.133, "dur": 2.778, + "args": { + "External id": 982319,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938882483.980, "dur": 1.490, + "args": { + "External id": 982320,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938882484.682, "dur": 0.723, + "args": { + "External id": 982321,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938882489.599, "dur": 4.728, + "args": { + "External id": 982322,"Record function id": 0, "Ev Idx": 4913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938882491.171, "dur": 2.742, + "args": { + "External id": 982323,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938882491.795, "dur": 1.660, + "args": { + "External id": 982324,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938882492.503, "dur": 0.880, + "args": { + "External id": 982325,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938882497.727, "dur": 3.939, + "args": { + "External id": 982326,"Record function id": 0, "Ev Idx": 4917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938882499.142, "dur": 2.102, + "args": { + "External id": 982327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938882499.622, "dur": 1.211, + "args": { + "External id": 982328,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938882500.062, "dur": 0.704, + "args": { + "External id": 982329,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938882504.779, "dur": 5.661, + "args": { + "External id": 982330,"Record function id": 0, "Ev Idx": 4921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938882506.033, "dur": 3.993, + "args": { + "External id": 982331,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938882506.711, "dur": 2.827, + "args": { + "External id": 982332,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938882508.900, "dur": 0.573, + "args": { + "External id": 982333,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938882513.483, "dur": 4.528, + "args": { + "External id": 982334,"Record function id": 0, "Ev Idx": 4925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938882515.049, "dur": 2.527, + "args": { + "External id": 982335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938882515.609, "dur": 1.411, + "args": { + "External id": 982336,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938882516.002, "dur": 0.897, + "args": { + "External id": 982337,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938882522.613, "dur": 60183.846, + "args": { + "External id": 982338,"Record function id": 0, "Sequence number": 10552253, "Fwd thread id": 1, "Ev Idx": 4929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938882524.562, "dur": 60172.454, + "args": { + "External id": 982339,"Sequence number": 10552253, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4930 + } + }, + { + "ph": "f", "id": 213, "pid": 2338706, "tid": 2379422, "ts": 6345938882524.562, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.5)", "pid": 2338706, "tid": 2379422, + "ts": 6345938882556.444, "dur": 39.591, + "args": { + "External id": 982340,"Record function id": 0, "Ev Idx": 4931 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.5)", "pid": 2338706, "tid": 2379422, + "ts": 6345938882604.477, "dur": 70.735, + "args": { + "External id": 982341,"Record function id": 0, "Ev Idx": 4932 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.5)", "pid": 2338706, "tid": 2379422, + "ts": 6345938882681.740, "dur": 60006.310, + "args": { + "External id": 982342,"Record function id": 0, "Ev Idx": 4933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938882773.705, "dur": 6.508, + "args": { + "External id": 982343,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938882789.981, "dur": 7.059, + "args": { + "External id": 982344,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938882811.247, "dur": 58966.465, + "args": { + "External id": 982345,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938882826.032, "dur": 58937.461, + "args": { + "External id": 982346,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938882917.554, "dur": 18.868, + "args": { + "External id": 982347,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938882955.811, "dur": 58759.996, + "args": { + "External id": 982348,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938882962.933, "dur": 58751.769, + "args": { + "External id": 982349,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938882967.861, "dur": 12.907, + "args": { + "External id": 982350,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938882982.945, "dur": 58726.192, + "args": { + "External id": 982351,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938941892.137, "dur": 12.613, + "args": { + "External id": 982352,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938941895.743, "dur": 8.574, + "args": { + "External id": 982353,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938941937.792, "dur": 438.131, + "args": { + "External id": 982354,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938941973.234, "dur": 396.848, + "args": { + "External id": 982355,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4946, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938941986.367, "dur": 377.228, + "args": { + "External id": 982356,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938942402.622, "dur": 2.712, + "args": { + "External id": 982357,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4948, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938942472.039, "dur": 7.494, + "args": { + "External id": 982358,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938942525.525, "dur": 1.452, + "args": { + "External id": 982359,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938942543.677, "dur": 3.240, + "args": { + "External id": 982360,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938942559.690, "dur": 0.925, + "args": { + "External id": 982361,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938942572.421, "dur": 1.180, + "args": { + "External id": 982362,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938942583.889, "dur": 0.644, + "args": { + "External id": 982363,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938942597.408, "dur": 3.045, + "args": { + "External id": 982364,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938942611.716, "dur": 2.536, + "args": { + "External id": 982365,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938942624.515, "dur": 1.085, + "args": { + "External id": 982366,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938942722.749, "dur": 3084.716, + "args": { + "External id": 982367,"Record function id": 0, "Ev Idx": 4958 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.4)", "pid": 2338706, "tid": 2379422, + "ts": 6345938942747.586, "dur": 1139.623, + "args": { + "External id": 982368,"Record function id": 0, "Ev Idx": 4959 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338706, "tid": 2379422, + "ts": 6345938942763.467, "dur": 412.387, + "args": { + "External id": 982369,"Record function id": 0, "Ev Idx": 4960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938942857.048, "dur": 4.589, + "args": { + "External id": 982370,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938942865.244, "dur": 0.629, + "args": { + "External id": 982371,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938942868.066, "dur": 2.801, + "args": { + "External id": 982372,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938942872.557, "dur": 0.952, + "args": { + "External id": 982373,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938942877.623, "dur": 0.960, + "args": { + "External id": 982374,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938942880.276, "dur": 0.813, + "args": { + "External id": 982375,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938942882.934, "dur": 2.098, + "args": { + "External id": 982376,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938942886.701, "dur": 0.888, + "args": { + "External id": 982377,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938942891.347, "dur": 1.066, + "args": { + "External id": 982378,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938942894.490, "dur": 0.666, + "args": { + "External id": 982379,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938942913.188, "dur": 224.236, + "args": { + "External id": 982380,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938942931.083, "dur": 199.454, + "args": { + "External id": 982381,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938942952.481, "dur": 18.893, + "args": { + "External id": 982382,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938942974.782, "dur": 123.727, + "args": { + "External id": 982383,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938942979.713, "dur": 118.426, + "args": { + "External id": 982384,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938942984.019, "dur": 5.770, + "args": { + "External id": 982385,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938942991.370, "dur": 105.476, + "args": { + "External id": 982386,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4977 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.3", "pid": 2338706, "tid": 2379422, + "ts": 6345938943277.664, "dur": 601.058, + "args": { + "External id": 982387,"Record function id": 0, "Ev Idx": 4978 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338706, "tid": 2379422, + "ts": 6345938943298.054, "dur": 567.690, + "args": { + "External id": 982388,"Record function id": 0, "Ev Idx": 4979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938943363.822, "dur": 5.910, + "args": { + "External id": 982389,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938943385.148, "dur": 36.905, + "args": { + "External id": 982390,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938943390.962, "dur": 2.820, + "args": { + "External id": 982391,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938943396.006, "dur": 0.668, + "args": { + "External id": 982392,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938943398.085, "dur": 0.638, + "args": { + "External id": 982393,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938943402.116, "dur": 0.440, + "args": { + "External id": 982394,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938943403.810, "dur": 0.368, + "args": { + "External id": 982395,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938943405.929, "dur": 2.901, + "args": { + "External id": 982396,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938943411.577, "dur": 0.328, + "args": { + "External id": 982397,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938943413.287, "dur": 0.487, + "args": { + "External id": 982398,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938943415.431, "dur": 1.929, + "args": { + "External id": 982399,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938943434.045, "dur": 45.018, + "args": { + "External id": 982400,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345938943510.758, "dur": 121.104, + "args": { + "External id": 982401,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938943521.565, "dur": 3.654, + "args": { + "External id": 982402,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345938943530.794, "dur": 10.510, + "args": { + "External id": 982403,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345938943535.293, "dur": 5.582, + "args": { + "External id": 982404,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938943539.130, "dur": 0.599, + "args": { + "External id": 982405,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345938943548.162, "dur": 29.397, + "args": { + "External id": 982406,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938943550.410, "dur": 0.441, + "args": { + "External id": 982407,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938943553.484, "dur": 0.385, + "args": { + "External id": 982408,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938943555.299, "dur": 3.235, + "args": { + "External id": 982409,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938943560.208, "dur": 0.548, + "args": { + "External id": 982410,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938943562.108, "dur": 0.385, + "args": { + "External id": 982411,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938943566.321, "dur": 0.347, + "args": { + "External id": 982412,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938943568.241, "dur": 0.395, + "args": { + "External id": 982413,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938943570.373, "dur": 0.390, + "args": { + "External id": 982414,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938943573.218, "dur": 0.486, + "args": { + "External id": 982415,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938943590.632, "dur": 33.352, + "args": { + "External id": 982416,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345938943674.489, "dur": 121.274, + "args": { + "External id": 982417,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938943704.206, "dur": 88.032, + "args": { + "External id": 982418,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5009, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345938943713.457, "dur": 74.447, + "args": { + "External id": 982419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345938943815.547, "dur": 1.796, + "args": { + "External id": 982420,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5011, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938943895.014, "dur": 1883.247, + "args": { + "External id": 982421,"Sequence number": 10552252, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5012 + } + }, + { + "ph": "f", "id": 214, "pid": 2338706, "tid": 2379422, "ts": 6345938943895.014, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938944030.903, "dur": 147.518, + "args": { + "External id": 982422,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938944231.660, "dur": 45.334, + "args": { + "External id": 982423,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345938944297.612, "dur": 50.379, + "args": { + "External id": 982424,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938944361.193, "dur": 31.429, + "args": { + "External id": 982425,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938944398.956, "dur": 32.585, + "args": { + "External id": 982426,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938944437.807, "dur": 26.793, + "args": { + "External id": 982427,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938944472.524, "dur": 28.793, + "args": { + "External id": 982428,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938944528.569, "dur": 23.821, + "args": { + "External id": 982429,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345938944597.079, "dur": 29.290, + "args": { + "External id": 982430,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938944652.374, "dur": 19.242, + "args": { + "External id": 982431,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938944686.922, "dur": 16.554, + "args": { + "External id": 982432,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938944711.753, "dur": 36.954, + "args": { + "External id": 982433,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938944752.217, "dur": 32.253, + "args": { + "External id": 982434,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345938944817.835, "dur": 323.693, + "args": { + "External id": 982435,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938944901.623, "dur": 7.251, + "args": { + "External id": 982436,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938944911.026, "dur": 2.947, + "args": { + "External id": 982437,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938944915.661, "dur": 3.353, + "args": { + "External id": 982438,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938944920.258, "dur": 2.193, + "args": { + "External id": 982439,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938944964.910, "dur": 5.457, + "args": { + "External id": 982440,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938944967.211, "dur": 2.953, + "args": { + "External id": 982441,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938944974.853, "dur": 54.041, + "args": { + "External id": 982442,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938944982.132, "dur": 3.826, + "args": { + "External id": 982443,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345938945031.891, "dur": 2.326, + "args": { + "External id": 982444,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938945033.093, "dur": 1.037, + "args": { + "External id": 982445,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345938945035.370, "dur": 55.081, + "args": { + "External id": 982446,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938945039.978, "dur": 0.738, + "args": { + "External id": 982447,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345938945188.431, "dur": 30.599, + "args": { + "External id": 982448,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938945240.913, "dur": 18.843, + "args": { + "External id": 982449,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938945268.078, "dur": 52.827, + "args": { + "External id": 982450,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938945327.423, "dur": 39.840, + "args": { + "External id": 982451,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938945389.167, "dur": 33.213, + "args": { + "External id": 982452,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938945432.428, "dur": 32.152, + "args": { + "External id": 982453,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938945472.932, "dur": 27.083, + "args": { + "External id": 982454,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345938945507.289, "dur": 33.157, + "args": { + "External id": 982455,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345938945564.552, "dur": 29.951, + "args": { + "External id": 982456,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938945615.095, "dur": 25.517, + "args": { + "External id": 982457,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345938945661.414, "dur": 16.845, + "args": { + "External id": 982458,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345938945695.323, "dur": 13.665, + "args": { + "External id": 982459,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345938945728.302, "dur": 19.629, + "args": { + "External id": 982460,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938945831.668, "dur": 18.040, + "args": { + "External id": 982461,"Record function id": 0, "Ev Idx": 5052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938945835.561, "dur": 13.155, + "args": { + "External id": 982462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938945840.259, "dur": 7.507, + "args": { + "External id": 982463,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938945843.681, "dur": 3.966, + "args": { + "External id": 982464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938945853.743, "dur": 5.207, + "args": { + "External id": 982465,"Record function id": 0, "Ev Idx": 5056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938945855.436, "dur": 3.043, + "args": { + "External id": 982466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938945856.292, "dur": 1.477, + "args": { + "External id": 982467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938945856.974, "dur": 0.685, + "args": { + "External id": 982468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938945862.422, "dur": 6.703, + "args": { + "External id": 982469,"Record function id": 0, "Ev Idx": 5060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938945863.714, "dur": 4.942, + "args": { + "External id": 982470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938945864.271, "dur": 3.916, + "args": { + "External id": 982471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938945865.015, "dur": 3.027, + "args": { + "External id": 982472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938945872.446, "dur": 4.403, + "args": { + "External id": 982473,"Record function id": 0, "Ev Idx": 5064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938945873.968, "dur": 2.469, + "args": { + "External id": 982474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938945874.707, "dur": 1.173, + "args": { + "External id": 982475,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938945875.016, "dur": 0.763, + "args": { + "External id": 982476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938945880.031, "dur": 4.499, + "args": { + "External id": 982477,"Record function id": 0, "Ev Idx": 5068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938945881.708, "dur": 2.406, + "args": { + "External id": 982478,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938945882.379, "dur": 1.308, + "args": { + "External id": 982479,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938945882.917, "dur": 0.707, + "args": { + "External id": 982480,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938945887.627, "dur": 4.207, + "args": { + "External id": 982481,"Record function id": 0, "Ev Idx": 5072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938945889.079, "dur": 2.319, + "args": { + "External id": 982482,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938945889.716, "dur": 1.200, + "args": { + "External id": 982483,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938945890.194, "dur": 0.658, + "args": { + "External id": 982484,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938945895.131, "dur": 5.337, + "args": { + "External id": 982485,"Record function id": 0, "Ev Idx": 5076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938945896.838, "dur": 3.199, + "args": { + "External id": 982486,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938945897.728, "dur": 1.666, + "args": { + "External id": 982487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938945898.435, "dur": 0.894, + "args": { + "External id": 982488,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938945903.828, "dur": 4.078, + "args": { + "External id": 982489,"Record function id": 0, "Ev Idx": 5080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938945905.103, "dur": 2.398, + "args": { + "External id": 982490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938945905.953, "dur": 1.143, + "args": { + "External id": 982491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938945906.420, "dur": 0.611, + "args": { + "External id": 982492,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938945911.310, "dur": 7.381, + "args": { + "External id": 982493,"Record function id": 0, "Ev Idx": 5084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345938945912.770, "dur": 5.505, + "args": { + "External id": 982494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938945913.230, "dur": 4.380, + "args": { + "External id": 982495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345938945916.850, "dur": 0.686, + "args": { + "External id": 982496,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938945923.367, "dur": 61351.116, + "args": { + "External id": 982497,"Record function id": 0, "Sequence number": 10552251, "Fwd thread id": 1, "Ev Idx": 5088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345938945925.139, "dur": 61340.091, + "args": { + "External id": 982498,"Sequence number": 10552251, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5089 + } + }, + { + "ph": "f", "id": 215, "pid": 2338706, "tid": 2379422, "ts": 6345938945925.139, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.4)", "pid": 2338706, "tid": 2379422, + "ts": 6345938945956.520, "dur": 40.563, + "args": { + "External id": 982499,"Record function id": 0, "Ev Idx": 5090 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.4)", "pid": 2338706, "tid": 2379422, + "ts": 6345938946005.184, "dur": 145.766, + "args": { + "External id": 982500,"Record function id": 0, "Ev Idx": 5091 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.4)", "pid": 2338706, "tid": 2379422, + "ts": 6345938946161.261, "dur": 61095.006, + "args": { + "External id": 982501,"Record function id": 0, "Ev Idx": 5092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938946262.085, "dur": 8.507, + "args": { + "External id": 982502,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345938946284.276, "dur": 7.605, + "args": { + "External id": 982503,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938946308.025, "dur": 59979.155, + "args": { + "External id": 982504,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345938946322.909, "dur": 59949.818, + "args": { + "External id": 982505,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345938946414.312, "dur": 18.711, + "args": { + "External id": 982506,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345938946452.506, "dur": 59769.463, + "args": { + "External id": 982507,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345938946455.703, "dur": 59765.204, + "args": { + "External id": 982508,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345938946462.503, "dur": 10.551, + "args": { + "External id": 982509,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345938946475.587, "dur": 59739.870, + "args": { + "External id": 982510,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939006404.628, "dur": 12.876, + "args": { + "External id": 982511,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939006408.563, "dur": 8.457, + "args": { + "External id": 982512,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345939006449.132, "dur": 437.575, + "args": { + "External id": 982513,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345939006485.151, "dur": 396.095, + "args": { + "External id": 982514,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5105, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345939006499.671, "dur": 375.188, + "args": { + "External id": 982515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345939006910.488, "dur": 2.211, + "args": { + "External id": 982516,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5107, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939006974.880, "dur": 6.856, + "args": { + "External id": 982517,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939007049.758, "dur": 2.351, + "args": { + "External id": 982518,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939007105.060, "dur": 4.325, + "args": { + "External id": 982519,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939007122.766, "dur": 1.149, + "args": { + "External id": 982520,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939007137.340, "dur": 1.082, + "args": { + "External id": 982521,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939007149.114, "dur": 1.309, + "args": { + "External id": 982522,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939007161.155, "dur": 3.010, + "args": { + "External id": 982523,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939007178.116, "dur": 2.196, + "args": { + "External id": 982524,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939007190.215, "dur": 0.978, + "args": { + "External id": 982525,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345939007292.177, "dur": 3173.037, + "args": { + "External id": 982526,"Record function id": 0, "Ev Idx": 5117 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.3)", "pid": 2338706, "tid": 2379422, + "ts": 6345939007314.394, "dur": 1254.101, + "args": { + "External id": 982527,"Record function id": 0, "Ev Idx": 5118 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338706, "tid": 2379422, + "ts": 6345939007329.840, "dur": 441.964, + "args": { + "External id": 982528,"Record function id": 0, "Ev Idx": 5119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939007516.404, "dur": 5.144, + "args": { + "External id": 982529,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939007525.961, "dur": 1.192, + "args": { + "External id": 982530,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939007528.946, "dur": 3.099, + "args": { + "External id": 982531,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939007536.058, "dur": 0.840, + "args": { + "External id": 982532,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939007538.803, "dur": 0.748, + "args": { + "External id": 982533,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939007541.351, "dur": 0.746, + "args": { + "External id": 982534,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939007543.974, "dur": 1.958, + "args": { + "External id": 982535,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939007549.281, "dur": 0.820, + "args": { + "External id": 982536,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939007551.726, "dur": 0.754, + "args": { + "External id": 982537,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939007553.961, "dur": 0.800, + "args": { + "External id": 982538,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345939007574.828, "dur": 162.794, + "args": { + "External id": 982539,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345939007592.321, "dur": 140.321, + "args": { + "External id": 982540,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939007613.396, "dur": 16.236, + "args": { + "External id": 982541,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345939007633.291, "dur": 70.691, + "args": { + "External id": 982542,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345939007636.247, "dur": 67.294, + "args": { + "External id": 982543,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939007640.556, "dur": 5.903, + "args": { + "External id": 982544,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345939007647.924, "dur": 54.939, + "args": { + "External id": 982545,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5136 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.2", "pid": 2338706, "tid": 2379422, + "ts": 6345939007869.811, "dur": 689.960, + "args": { + "External id": 982546,"Record function id": 0, "Ev Idx": 5137 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338706, "tid": 2379422, + "ts": 6345939007889.575, "dur": 656.282, + "args": { + "External id": 982547,"Record function id": 0, "Ev Idx": 5138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939007955.773, "dur": 4.931, + "args": { + "External id": 982548,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345939007976.287, "dur": 55.888, + "args": { + "External id": 982549,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939007981.873, "dur": 1.796, + "args": { + "External id": 982550,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939007985.809, "dur": 1.757, + "args": { + "External id": 982551,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939007988.996, "dur": 0.633, + "args": { + "External id": 982552,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939007991.459, "dur": 0.379, + "args": { + "External id": 982553,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939007995.133, "dur": 0.528, + "args": { + "External id": 982554,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939007997.147, "dur": 2.686, + "args": { + "External id": 982555,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939008001.552, "dur": 1.047, + "args": { + "External id": 982556,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939008005.703, "dur": 0.469, + "args": { + "External id": 982557,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939008026.129, "dur": 0.623, + "args": { + "External id": 982558,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345939008044.201, "dur": 82.756, + "args": { + "External id": 982559,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345939008164.850, "dur": 127.187, + "args": { + "External id": 982560,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 5151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939008177.192, "dur": 4.762, + "args": { + "External id": 982561,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345939008187.550, "dur": 11.443, + "args": { + "External id": 982562,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345939008191.963, "dur": 6.613, + "args": { + "External id": 982563,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 5154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939008196.123, "dur": 1.009, + "args": { + "External id": 982564,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345939008206.685, "dur": 31.018, + "args": { + "External id": 982565,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939008209.472, "dur": 0.394, + "args": { + "External id": 982566,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939008212.712, "dur": 0.591, + "args": { + "External id": 982567,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939008215.265, "dur": 2.527, + "args": { + "External id": 982568,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939008219.593, "dur": 1.489, + "args": { + "External id": 982569,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939008223.005, "dur": 0.524, + "args": { + "External id": 982570,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939008225.136, "dur": 0.847, + "args": { + "External id": 982571,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939008228.987, "dur": 0.274, + "args": { + "External id": 982572,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939008231.228, "dur": 0.522, + "args": { + "External id": 982573,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939008233.670, "dur": 0.309, + "args": { + "External id": 982574,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345939008253.211, "dur": 31.193, + "args": { + "External id": 982575,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345939008343.392, "dur": 128.452, + "args": { + "External id": 982576,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345939008378.153, "dur": 90.221, + "args": { + "External id": 982577,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5168, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345939008389.125, "dur": 74.980, + "args": { + "External id": 982578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345939008492.031, "dur": 1.891, + "args": { + "External id": 982579,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5170, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345939008575.632, "dur": 1865.692, + "args": { + "External id": 982580,"Sequence number": 10552250, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5171 + } + }, + { + "ph": "f", "id": 216, "pid": 2338706, "tid": 2379422, "ts": 6345939008575.632, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939008694.325, "dur": 105.899, + "args": { + "External id": 982581,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345939008845.155, "dur": 43.149, + "args": { + "External id": 982582,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345939008906.329, "dur": 49.401, + "args": { + "External id": 982583,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939008967.870, "dur": 31.789, + "args": { + "External id": 982584,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939009006.328, "dur": 97.651, + "args": { + "External id": 982585,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939009115.106, "dur": 35.889, + "args": { + "External id": 982586,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939009159.874, "dur": 32.915, + "args": { + "External id": 982587,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345939009224.015, "dur": 28.656, + "args": { + "External id": 982588,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345939009274.100, "dur": 28.586, + "args": { + "External id": 982589,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345939009329.189, "dur": 19.528, + "args": { + "External id": 982590,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345939009362.797, "dur": 16.277, + "args": { + "External id": 982591,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939009408.450, "dur": 39.412, + "args": { + "External id": 982592,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939009451.897, "dur": 32.762, + "args": { + "External id": 982593,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345939009518.307, "dur": 255.911, + "args": { + "External id": 982594,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939009600.681, "dur": 5.996, + "args": { + "External id": 982595,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939009609.081, "dur": 2.775, + "args": { + "External id": 982596,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939009613.627, "dur": 3.471, + "args": { + "External id": 982597,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939009618.494, "dur": 2.503, + "args": { + "External id": 982598,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345939009664.926, "dur": 5.030, + "args": { + "External id": 982599,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939009666.862, "dur": 2.940, + "args": { + "External id": 982600,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345939009672.021, "dur": 35.300, + "args": { + "External id": 982601,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939009677.964, "dur": 5.517, + "args": { + "External id": 982602,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345939009708.750, "dur": 3.979, + "args": { + "External id": 982603,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939009711.770, "dur": 0.907, + "args": { + "External id": 982604,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345939009713.522, "dur": 16.499, + "args": { + "External id": 982605,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939009717.915, "dur": 0.546, + "args": { + "External id": 982606,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345939009818.088, "dur": 26.080, + "args": { + "External id": 982607,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345939009870.850, "dur": 27.671, + "args": { + "External id": 982608,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939009910.788, "dur": 39.183, + "args": { + "External id": 982609,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939009956.577, "dur": 42.885, + "args": { + "External id": 982610,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939010029.984, "dur": 63.663, + "args": { + "External id": 982611,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939010104.063, "dur": 35.677, + "args": { + "External id": 982612,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939010148.062, "dur": 28.354, + "args": { + "External id": 982613,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939010183.391, "dur": 29.473, + "args": { + "External id": 982614,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345939010237.589, "dur": 27.086, + "args": { + "External id": 982615,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345939010285.223, "dur": 25.040, + "args": { + "External id": 982616,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345939010328.426, "dur": 18.677, + "args": { + "External id": 982617,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345939010364.825, "dur": 13.438, + "args": { + "External id": 982618,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345939010396.424, "dur": 15.385, + "args": { + "External id": 982619,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939010490.475, "dur": 17.352, + "args": { + "External id": 982620,"Record function id": 0, "Ev Idx": 5211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939010493.617, "dur": 13.151, + "args": { + "External id": 982621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939010498.187, "dur": 7.595, + "args": { + "External id": 982622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939010499.979, "dur": 5.692, + "args": { + "External id": 982623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939010511.759, "dur": 5.245, + "args": { + "External id": 982624,"Record function id": 0, "Ev Idx": 5215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939010513.576, "dur": 2.974, + "args": { + "External id": 982625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939010514.281, "dur": 1.703, + "args": { + "External id": 982626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939010515.054, "dur": 0.851, + "args": { + "External id": 982627,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939010520.773, "dur": 7.424, + "args": { + "External id": 982628,"Record function id": 0, "Ev Idx": 5219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939010522.450, "dur": 5.274, + "args": { + "External id": 982629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939010523.151, "dur": 3.890, + "args": { + "External id": 982630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939010524.102, "dur": 2.834, + "args": { + "External id": 982631,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939010531.486, "dur": 4.236, + "args": { + "External id": 982632,"Record function id": 0, "Ev Idx": 5223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939010533.093, "dur": 2.189, + "args": { + "External id": 982633,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939010533.582, "dur": 1.258, + "args": { + "External id": 982634,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939010534.146, "dur": 0.620, + "args": { + "External id": 982635,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939010538.949, "dur": 4.137, + "args": { + "External id": 982636,"Record function id": 0, "Ev Idx": 5227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939010540.144, "dur": 2.472, + "args": { + "External id": 982637,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939010540.596, "dur": 1.480, + "args": { + "External id": 982638,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939010541.218, "dur": 0.788, + "args": { + "External id": 982639,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939010546.262, "dur": 4.822, + "args": { + "External id": 982640,"Record function id": 0, "Ev Idx": 5231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939010547.599, "dur": 3.060, + "args": { + "External id": 982641,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939010548.464, "dur": 1.430, + "args": { + "External id": 982642,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939010549.213, "dur": 0.607, + "args": { + "External id": 982643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939010554.482, "dur": 4.369, + "args": { + "External id": 982644,"Record function id": 0, "Ev Idx": 5235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939010556.130, "dur": 2.317, + "args": { + "External id": 982645,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939010556.660, "dur": 1.184, + "args": { + "External id": 982646,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939010557.133, "dur": 0.645, + "args": { + "External id": 982647,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939010561.910, "dur": 3.792, + "args": { + "External id": 982648,"Record function id": 0, "Ev Idx": 5239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939010563.252, "dur": 2.006, + "args": { + "External id": 982649,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939010563.762, "dur": 0.918, + "args": { + "External id": 982650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939010564.038, "dur": 0.571, + "args": { + "External id": 982651,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939010569.362, "dur": 6.376, + "args": { + "External id": 982652,"Record function id": 0, "Ev Idx": 5243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939010571.026, "dur": 4.284, + "args": { + "External id": 982653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939010571.577, "dur": 3.139, + "args": { + "External id": 982654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939010574.005, "dur": 0.645, + "args": { + "External id": 982655,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345939010580.898, "dur": 62378.900, + "args": { + "External id": 982656,"Record function id": 0, "Sequence number": 10552249, "Fwd thread id": 1, "Ev Idx": 5247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345939010582.804, "dur": 62366.710, + "args": { + "External id": 982657,"Sequence number": 10552249, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5248 + } + }, + { + "ph": "f", "id": 217, "pid": 2338706, "tid": 2379422, "ts": 6345939010582.804, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.3)", "pid": 2338706, "tid": 2379422, + "ts": 6345939010612.675, "dur": 39.082, + "args": { + "External id": 982658,"Record function id": 0, "Ev Idx": 5249 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.3)", "pid": 2338706, "tid": 2379422, + "ts": 6345939010659.860, "dur": 66.949, + "args": { + "External id": 982659,"Record function id": 0, "Ev Idx": 5250 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.3)", "pid": 2338706, "tid": 2379422, + "ts": 6345939010733.182, "dur": 62207.917, + "args": { + "External id": 982660,"Record function id": 0, "Ev Idx": 5251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939010828.229, "dur": 10.999, + "args": { + "External id": 982661,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939010851.657, "dur": 6.914, + "args": { + "External id": 982662,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345939010873.389, "dur": 61216.753, + "args": { + "External id": 982663,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345939010887.765, "dur": 61187.280, + "args": { + "External id": 982664,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939011028.403, "dur": 22.446, + "args": { + "External id": 982665,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345939011109.922, "dur": 60880.743, + "args": { + "External id": 982666,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345939011113.239, "dur": 60876.270, + "args": { + "External id": 982667,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939011118.399, "dur": 16.547, + "args": { + "External id": 982668,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345939011137.425, "dur": 60847.053, + "args": { + "External id": 982669,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939072203.620, "dur": 13.870, + "args": { + "External id": 982670,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939072207.569, "dur": 9.408, + "args": { + "External id": 982671,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345939072247.740, "dur": 393.009, + "args": { + "External id": 982672,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345939072283.004, "dur": 352.563, + "args": { + "External id": 982673,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5264, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345939072296.433, "dur": 333.580, + "args": { + "External id": 982674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345939072663.714, "dur": 2.440, + "args": { + "External id": 982675,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5266, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939072724.270, "dur": 7.229, + "args": { + "External id": 982676,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939072777.405, "dur": 1.587, + "args": { + "External id": 982677,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939072797.429, "dur": 3.738, + "args": { + "External id": 982678,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939072813.953, "dur": 0.861, + "args": { + "External id": 982679,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939072829.018, "dur": 1.051, + "args": { + "External id": 982680,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939072840.889, "dur": 0.883, + "args": { + "External id": 982681,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939072852.381, "dur": 3.059, + "args": { + "External id": 982682,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939072866.304, "dur": 2.491, + "args": { + "External id": 982683,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939072879.063, "dur": 0.758, + "args": { + "External id": 982684,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345939072976.022, "dur": 3120.865, + "args": { + "External id": 982685,"Record function id": 0, "Ev Idx": 5276 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.2)", "pid": 2338706, "tid": 2379422, + "ts": 6345939072998.008, "dur": 1237.079, + "args": { + "External id": 982686,"Record function id": 0, "Ev Idx": 5277 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338706, "tid": 2379422, + "ts": 6345939073034.333, "dur": 399.715, + "args": { + "External id": 982687,"Record function id": 0, "Ev Idx": 5278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939073172.205, "dur": 5.356, + "args": { + "External id": 982688,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939073181.468, "dur": 1.101, + "args": { + "External id": 982689,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939073184.714, "dur": 3.288, + "args": { + "External id": 982690,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939073192.704, "dur": 0.871, + "args": { + "External id": 982691,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939073195.031, "dur": 1.101, + "args": { + "External id": 982692,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939073198.011, "dur": 0.911, + "args": { + "External id": 982693,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939073200.851, "dur": 2.099, + "args": { + "External id": 982694,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939073206.642, "dur": 0.693, + "args": { + "External id": 982695,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939073208.825, "dur": 1.186, + "args": { + "External id": 982696,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939073211.892, "dur": 0.570, + "args": { + "External id": 982697,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345939073232.588, "dur": 169.097, + "args": { + "External id": 982698,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345939073250.333, "dur": 146.406, + "args": { + "External id": 982699,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939073270.483, "dur": 17.866, + "args": { + "External id": 982700,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345939073298.513, "dur": 70.221, + "args": { + "External id": 982701,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345939073303.638, "dur": 64.661, + "args": { + "External id": 982702,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073307.649, "dur": 5.017, + "args": { + "External id": 982703,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345939073314.507, "dur": 53.135, + "args": { + "External id": 982704,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5295 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.1", "pid": 2338706, "tid": 2379422, + "ts": 6345939073528.461, "dur": 698.324, + "args": { + "External id": 982705,"Record function id": 0, "Ev Idx": 5296 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338706, "tid": 2379422, + "ts": 6345939073547.798, "dur": 665.438, + "args": { + "External id": 982706,"Record function id": 0, "Ev Idx": 5297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939073612.722, "dur": 5.239, + "args": { + "External id": 982707,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345939073634.590, "dur": 59.201, + "args": { + "External id": 982708,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073640.322, "dur": 1.805, + "args": { + "External id": 982709,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073669.503, "dur": 0.540, + "args": { + "External id": 982710,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073671.786, "dur": 0.448, + "args": { + "External id": 982711,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073673.845, "dur": 0.463, + "args": { + "External id": 982712,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073677.514, "dur": 0.347, + "args": { + "External id": 982713,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073679.281, "dur": 2.555, + "args": { + "External id": 982714,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073683.463, "dur": 2.047, + "args": { + "External id": 982715,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073686.941, "dur": 0.499, + "args": { + "External id": 982716,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073689.067, "dur": 0.651, + "args": { + "External id": 982717,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345939073705.379, "dur": 45.205, + "args": { + "External id": 982718,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345939073781.884, "dur": 121.278, + "args": { + "External id": 982719,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 5310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939073792.766, "dur": 3.908, + "args": { + "External id": 982720,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345939073802.031, "dur": 11.450, + "args": { + "External id": 982721,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345939073806.598, "dur": 6.368, + "args": { + "External id": 982722,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 5313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073810.816, "dur": 0.850, + "args": { + "External id": 982723,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345939073821.011, "dur": 30.059, + "args": { + "External id": 982724,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073823.411, "dur": 1.627, + "args": { + "External id": 982725,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073826.698, "dur": 0.575, + "args": { + "External id": 982726,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073828.987, "dur": 2.574, + "args": { + "External id": 982727,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073834.467, "dur": 0.361, + "args": { + "External id": 982728,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073836.621, "dur": 0.562, + "args": { + "External id": 982729,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073838.669, "dur": 0.355, + "args": { + "External id": 982730,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073841.552, "dur": 0.672, + "args": { + "External id": 982731,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073843.768, "dur": 0.338, + "args": { + "External id": 982732,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939073845.554, "dur": 1.926, + "args": { + "External id": 982733,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345939073863.551, "dur": 32.009, + "args": { + "External id": 982734,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345939073948.666, "dur": 181.303, + "args": { + "External id": 982735,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345939073983.406, "dur": 142.548, + "args": { + "External id": 982736,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5327, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345939073992.853, "dur": 127.800, + "args": { + "External id": 982737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345939074152.460, "dur": 2.267, + "args": { + "External id": 982738,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5329, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345939074243.027, "dur": 1795.858, + "args": { + "External id": 982739,"Sequence number": 10552248, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5330 + } + }, + { + "ph": "f", "id": 218, "pid": 2338706, "tid": 2379422, "ts": 6345939074243.027, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939074362.343, "dur": 108.620, + "args": { + "External id": 982740,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345939074517.532, "dur": 43.514, + "args": { + "External id": 982741,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345939074578.993, "dur": 49.748, + "args": { + "External id": 982742,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939074641.406, "dur": 31.291, + "args": { + "External id": 982743,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939074679.119, "dur": 32.763, + "args": { + "External id": 982744,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939074718.468, "dur": 26.852, + "args": { + "External id": 982745,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939074752.501, "dur": 28.080, + "args": { + "External id": 982746,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345939074808.979, "dur": 23.954, + "args": { + "External id": 982747,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345939074852.907, "dur": 28.371, + "args": { + "External id": 982748,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345939074906.419, "dur": 18.744, + "args": { + "External id": 982749,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345939074940.958, "dur": 13.917, + "args": { + "External id": 982750,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939074962.106, "dur": 37.846, + "args": { + "External id": 982751,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939075003.152, "dur": 91.296, + "args": { + "External id": 982752,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345939075153.333, "dur": 258.574, + "args": { + "External id": 982753,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939075237.068, "dur": 7.284, + "args": { + "External id": 982754,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939075246.627, "dur": 2.652, + "args": { + "External id": 982755,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939075250.965, "dur": 3.347, + "args": { + "External id": 982756,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939075255.775, "dur": 2.011, + "args": { + "External id": 982757,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345939075302.167, "dur": 6.736, + "args": { + "External id": 982758,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939075305.812, "dur": 2.883, + "args": { + "External id": 982759,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345939075310.925, "dur": 33.151, + "args": { + "External id": 982760,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939075316.495, "dur": 3.641, + "args": { + "External id": 982761,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345939075345.339, "dur": 4.438, + "args": { + "External id": 982762,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939075348.943, "dur": 0.754, + "args": { + "External id": 982763,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345939075351.030, "dur": 16.512, + "args": { + "External id": 982764,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939075354.929, "dur": 0.459, + "args": { + "External id": 982765,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345939075457.916, "dur": 27.469, + "args": { + "External id": 982766,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345939075504.687, "dur": 16.972, + "args": { + "External id": 982767,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939075529.912, "dur": 47.136, + "args": { + "External id": 982768,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939075584.936, "dur": 37.279, + "args": { + "External id": 982769,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939075632.202, "dur": 34.946, + "args": { + "External id": 982770,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939075680.163, "dur": 36.843, + "args": { + "External id": 982771,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939075724.376, "dur": 31.347, + "args": { + "External id": 982772,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939075763.989, "dur": 32.715, + "args": { + "External id": 982773,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345939075820.004, "dur": 28.452, + "args": { + "External id": 982774,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345939075868.379, "dur": 23.967, + "args": { + "External id": 982775,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345939075910.672, "dur": 18.038, + "args": { + "External id": 982776,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345939075948.305, "dur": 13.283, + "args": { + "External id": 982777,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345939075975.132, "dur": 16.245, + "args": { + "External id": 982778,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939076121.905, "dur": 16.829, + "args": { + "External id": 982779,"Record function id": 0, "Ev Idx": 5370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939076125.223, "dur": 12.441, + "args": { + "External id": 982780,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939076129.919, "dur": 6.133, + "args": { + "External id": 982781,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939076131.657, "dur": 4.292, + "args": { + "External id": 982782,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939076142.918, "dur": 5.336, + "args": { + "External id": 982783,"Record function id": 0, "Ev Idx": 5374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939076144.559, "dur": 3.223, + "args": { + "External id": 982784,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939076145.618, "dur": 1.517, + "args": { + "External id": 982785,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939076146.204, "dur": 0.840, + "args": { + "External id": 982786,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939076151.452, "dur": 7.419, + "args": { + "External id": 982787,"Record function id": 0, "Ev Idx": 5378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939076153.152, "dur": 5.259, + "args": { + "External id": 982788,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939076154.270, "dur": 3.682, + "args": { + "External id": 982789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939076154.757, "dur": 3.109, + "args": { + "External id": 982790,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939076162.107, "dur": 4.104, + "args": { + "External id": 982791,"Record function id": 0, "Ev Idx": 5382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939076163.439, "dur": 2.323, + "args": { + "External id": 982792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939076164.128, "dur": 1.204, + "args": { + "External id": 982793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939076164.424, "dur": 0.844, + "args": { + "External id": 982794,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939076169.426, "dur": 5.089, + "args": { + "External id": 982795,"Record function id": 0, "Ev Idx": 5386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939076170.948, "dur": 3.136, + "args": { + "External id": 982796,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939076172.091, "dur": 1.460, + "args": { + "External id": 982797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939076172.562, "dur": 0.914, + "args": { + "External id": 982798,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939076177.810, "dur": 5.903, + "args": { + "External id": 982799,"Record function id": 0, "Ev Idx": 5390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939076179.026, "dur": 4.118, + "args": { + "External id": 982800,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939076179.551, "dur": 2.975, + "args": { + "External id": 982801,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939076181.738, "dur": 0.663, + "args": { + "External id": 982802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939076187.492, "dur": 4.176, + "args": { + "External id": 982803,"Record function id": 0, "Ev Idx": 5394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939076188.924, "dur": 2.291, + "args": { + "External id": 982804,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939076189.621, "dur": 1.105, + "args": { + "External id": 982805,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939076189.944, "dur": 0.695, + "args": { + "External id": 982806,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939076194.824, "dur": 4.607, + "args": { + "External id": 982807,"Record function id": 0, "Ev Idx": 5398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939076196.324, "dur": 2.664, + "args": { + "External id": 982808,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939076197.164, "dur": 1.419, + "args": { + "External id": 982809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939076197.851, "dur": 0.591, + "args": { + "External id": 982810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939076202.602, "dur": 4.266, + "args": { + "External id": 982811,"Record function id": 0, "Ev Idx": 5402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939076203.776, "dur": 2.670, + "args": { + "External id": 982812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939076204.817, "dur": 1.162, + "args": { + "External id": 982813,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939076205.200, "dur": 0.705, + "args": { + "External id": 982814,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345939076211.311, "dur": 61635.248, + "args": { + "External id": 982815,"Record function id": 0, "Sequence number": 10552247, "Fwd thread id": 1, "Ev Idx": 5406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345939076213.723, "dur": 61624.081, + "args": { + "External id": 982816,"Sequence number": 10552247, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5407 + } + }, + { + "ph": "f", "id": 219, "pid": 2338706, "tid": 2379422, "ts": 6345939076213.723, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.2)", "pid": 2338706, "tid": 2379422, + "ts": 6345939076248.792, "dur": 44.559, + "args": { + "External id": 982817,"Record function id": 0, "Ev Idx": 5408 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.2)", "pid": 2338706, "tid": 2379422, + "ts": 6345939076301.840, "dur": 73.718, + "args": { + "External id": 982818,"Record function id": 0, "Ev Idx": 5409 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.2)", "pid": 2338706, "tid": 2379422, + "ts": 6345939076382.135, "dur": 61447.144, + "args": { + "External id": 982819,"Record function id": 0, "Ev Idx": 5410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939076475.451, "dur": 7.496, + "args": { + "External id": 982820,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939076492.622, "dur": 6.606, + "args": { + "External id": 982821,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345939076514.125, "dur": 60410.864, + "args": { + "External id": 982822,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345939076530.744, "dur": 60379.904, + "args": { + "External id": 982823,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939076620.855, "dur": 21.417, + "args": { + "External id": 982824,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345939076661.782, "dur": 60200.724, + "args": { + "External id": 982825,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345939076664.963, "dur": 60196.452, + "args": { + "External id": 982826,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939076669.906, "dur": 9.146, + "args": { + "External id": 982827,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345939076681.347, "dur": 60174.668, + "args": { + "External id": 982828,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939137052.058, "dur": 41.753, + "args": { + "External id": 982829,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939137083.601, "dur": 9.458, + "args": { + "External id": 982830,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345939137124.972, "dur": 395.570, + "args": { + "External id": 982831,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345939137160.257, "dur": 355.392, + "args": { + "External id": 982832,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5423, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345939137174.513, "dur": 335.665, + "args": { + "External id": 982833,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345939137544.746, "dur": 2.491, + "args": { + "External id": 982834,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5425, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939137611.697, "dur": 6.987, + "args": { + "External id": 982835,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939137665.593, "dur": 1.445, + "args": { + "External id": 982836,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939137682.553, "dur": 3.635, + "args": { + "External id": 982837,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939137699.016, "dur": 0.935, + "args": { + "External id": 982838,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939137715.192, "dur": 0.679, + "args": { + "External id": 982839,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939137726.813, "dur": 0.702, + "args": { + "External id": 982840,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939137738.304, "dur": 3.127, + "args": { + "External id": 982841,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939137752.171, "dur": 2.385, + "args": { + "External id": 982842,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939137764.581, "dur": 0.734, + "args": { + "External id": 982843,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345939137861.036, "dur": 3135.905, + "args": { + "External id": 982844,"Record function id": 0, "Ev Idx": 5435 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.1)", "pid": 2338706, "tid": 2379422, + "ts": 6345939137883.184, "dur": 1262.669, + "args": { + "External id": 982845,"Record function id": 0, "Ev Idx": 5436 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338706, "tid": 2379422, + "ts": 6345939137899.213, "dur": 461.964, + "args": { + "External id": 982846,"Record function id": 0, "Ev Idx": 5437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939137991.389, "dur": 4.175, + "args": { + "External id": 982847,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939137998.708, "dur": 0.805, + "args": { + "External id": 982848,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939138003.867, "dur": 3.030, + "args": { + "External id": 982849,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939138030.240, "dur": 1.268, + "args": { + "External id": 982850,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939138033.863, "dur": 0.706, + "args": { + "External id": 982851,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939138036.245, "dur": 0.681, + "args": { + "External id": 982852,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939138040.973, "dur": 2.154, + "args": { + "External id": 982853,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939138044.861, "dur": 0.725, + "args": { + "External id": 982854,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939138047.304, "dur": 0.754, + "args": { + "External id": 982855,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939138049.865, "dur": 0.667, + "args": { + "External id": 982856,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345939138108.097, "dur": 215.165, + "args": { + "External id": 982857,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345939138170.384, "dur": 147.508, + "args": { + "External id": 982858,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939138191.617, "dur": 19.127, + "args": { + "External id": 982859,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345939138214.557, "dur": 73.229, + "args": { + "External id": 982860,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345939138217.334, "dur": 70.134, + "args": { + "External id": 982861,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138221.917, "dur": 8.243, + "args": { + "External id": 982862,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345939138232.136, "dur": 54.661, + "args": { + "External id": 982863,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5454 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.0", "pid": 2338706, "tid": 2379422, + "ts": 6345939138469.597, "dur": 668.396, + "args": { + "External id": 982864,"Record function id": 0, "Ev Idx": 5455 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338706, "tid": 2379422, + "ts": 6345939138489.142, "dur": 633.829, + "args": { + "External id": 982865,"Record function id": 0, "Ev Idx": 5456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939138557.126, "dur": 5.409, + "args": { + "External id": 982866,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345939138579.384, "dur": 34.747, + "args": { + "External id": 982867,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138584.934, "dur": 1.828, + "args": { + "External id": 982868,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138589.052, "dur": 2.137, + "args": { + "External id": 982869,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138592.672, "dur": 0.395, + "args": { + "External id": 982870,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138594.481, "dur": 0.396, + "args": { + "External id": 982871,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138597.895, "dur": 0.369, + "args": { + "External id": 982872,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138599.850, "dur": 2.501, + "args": { + "External id": 982873,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138603.794, "dur": 0.361, + "args": { + "External id": 982874,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138607.494, "dur": 0.279, + "args": { + "External id": 982875,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138609.135, "dur": 0.535, + "args": { + "External id": 982876,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345939138625.408, "dur": 43.687, + "args": { + "External id": 982877,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345939138700.253, "dur": 115.974, + "args": { + "External id": 982878,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 5469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939138710.578, "dur": 3.573, + "args": { + "External id": 982879,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345939138719.311, "dur": 10.527, + "args": { + "External id": 982880,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345939138723.862, "dur": 5.519, + "args": { + "External id": 982881,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 5472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138727.548, "dur": 0.680, + "args": { + "External id": 982882,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345939138736.797, "dur": 28.945, + "args": { + "External id": 982883,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138739.479, "dur": 0.323, + "args": { + "External id": 982884,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138743.004, "dur": 0.262, + "args": { + "External id": 982885,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138744.844, "dur": 2.451, + "args": { + "External id": 982886,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138749.410, "dur": 1.527, + "args": { + "External id": 982887,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138752.234, "dur": 0.714, + "args": { + "External id": 982888,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138754.900, "dur": 0.289, + "args": { + "External id": 982889,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138757.954, "dur": 0.348, + "args": { + "External id": 982890,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138759.862, "dur": 0.250, + "args": { + "External id": 982891,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939138761.560, "dur": 0.497, + "args": { + "External id": 982892,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345939138778.403, "dur": 30.202, + "args": { + "External id": 982893,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345939138861.330, "dur": 127.637, + "args": { + "External id": 982894,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345939138892.807, "dur": 93.056, + "args": { + "External id": 982895,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5486, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345939138902.130, "dur": 79.452, + "args": { + "External id": 982896,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345939139025.465, "dur": 3.106, + "args": { + "External id": 982897,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5488, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345939139155.405, "dur": 1815.689, + "args": { + "External id": 982898,"Sequence number": 10552246, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5489 + } + }, + { + "ph": "f", "id": 220, "pid": 2338706, "tid": 2379422, "ts": 6345939139155.405, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939139277.547, "dur": 111.632, + "args": { + "External id": 982899,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345939139434.844, "dur": 43.354, + "args": { + "External id": 982900,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345939139495.551, "dur": 51.160, + "args": { + "External id": 982901,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939139559.051, "dur": 31.038, + "args": { + "External id": 982902,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939139596.627, "dur": 33.895, + "args": { + "External id": 982903,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939139637.667, "dur": 26.871, + "args": { + "External id": 982904,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939139672.602, "dur": 29.502, + "args": { + "External id": 982905,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345939139729.456, "dur": 25.251, + "args": { + "External id": 982906,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345939139776.499, "dur": 30.143, + "args": { + "External id": 982907,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345939139829.914, "dur": 21.117, + "args": { + "External id": 982908,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345939139868.493, "dur": 15.440, + "args": { + "External id": 982909,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939139892.618, "dur": 35.855, + "args": { + "External id": 982910,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939139932.164, "dur": 32.312, + "args": { + "External id": 982911,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345939140038.651, "dur": 313.703, + "args": { + "External id": 982912,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939140165.659, "dur": 8.440, + "args": { + "External id": 982913,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939140176.815, "dur": 3.328, + "args": { + "External id": 982914,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939140181.649, "dur": 1.821, + "args": { + "External id": 982915,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939140184.835, "dur": 2.191, + "args": { + "External id": 982916,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345939140235.932, "dur": 7.019, + "args": { + "External id": 982917,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939140238.722, "dur": 4.050, + "args": { + "External id": 982918,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345939140245.261, "dur": 36.208, + "args": { + "External id": 982919,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939140251.342, "dur": 3.793, + "args": { + "External id": 982920,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345939140285.429, "dur": 2.016, + "args": { + "External id": 982921,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939140286.633, "dur": 0.710, + "args": { + "External id": 982922,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345939140288.705, "dur": 18.442, + "args": { + "External id": 982923,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939140292.719, "dur": 0.586, + "args": { + "External id": 982924,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345939140397.051, "dur": 32.068, + "args": { + "External id": 982925,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345939140449.015, "dur": 34.807, + "args": { + "External id": 982926,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939140499.891, "dur": 57.254, + "args": { + "External id": 982927,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939140564.796, "dur": 42.858, + "args": { + "External id": 982928,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939140618.592, "dur": 19.611, + "args": { + "External id": 982929,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939140643.857, "dur": 32.694, + "args": { + "External id": 982930,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939140684.134, "dur": 26.478, + "args": { + "External id": 982931,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939140717.250, "dur": 31.357, + "args": { + "External id": 982932,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345939140772.551, "dur": 23.965, + "args": { + "External id": 982933,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345939140816.448, "dur": 23.639, + "args": { + "External id": 982934,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345939140858.028, "dur": 17.288, + "args": { + "External id": 982935,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345939140894.336, "dur": 13.125, + "args": { + "External id": 982936,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345939140923.342, "dur": 16.238, + "args": { + "External id": 982937,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939141042.028, "dur": 54.781, + "args": { + "External id": 982938,"Record function id": 0, "Ev Idx": 5529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939141046.272, "dur": 48.814, + "args": { + "External id": 982939,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939141050.675, "dur": 42.874, + "args": { + "External id": 982940,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939141088.369, "dur": 4.813, + "args": { + "External id": 982941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939141103.671, "dur": 6.662, + "args": { + "External id": 982942,"Record function id": 0, "Ev Idx": 5533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939141106.076, "dur": 3.773, + "args": { + "External id": 982943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939141107.652, "dur": 1.625, + "args": { + "External id": 982944,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939141108.410, "dur": 0.772, + "args": { + "External id": 982945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939141113.788, "dur": 6.941, + "args": { + "External id": 982946,"Record function id": 0, "Ev Idx": 5537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939141115.528, "dur": 4.752, + "args": { + "External id": 982947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939141116.069, "dur": 3.772, + "args": { + "External id": 982948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939141116.689, "dur": 3.083, + "args": { + "External id": 982949,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939141123.927, "dur": 5.155, + "args": { + "External id": 982950,"Record function id": 0, "Ev Idx": 5541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939141125.338, "dur": 3.241, + "args": { + "External id": 982951,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939141126.202, "dur": 1.711, + "args": { + "External id": 982952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939141126.921, "dur": 0.927, + "args": { + "External id": 982953,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939141132.192, "dur": 3.814, + "args": { + "External id": 982954,"Record function id": 0, "Ev Idx": 5545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939141133.561, "dur": 2.009, + "args": { + "External id": 982955,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939141134.017, "dur": 1.101, + "args": { + "External id": 982956,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939141134.397, "dur": 0.655, + "args": { + "External id": 982957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939141139.290, "dur": 6.617, + "args": { + "External id": 982958,"Record function id": 0, "Ev Idx": 5549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939141140.729, "dur": 4.710, + "args": { + "External id": 982959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939141141.299, "dur": 3.603, + "args": { + "External id": 982960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939141144.212, "dur": 0.589, + "args": { + "External id": 982961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939141149.105, "dur": 4.787, + "args": { + "External id": 982962,"Record function id": 0, "Ev Idx": 5553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939141150.709, "dur": 2.773, + "args": { + "External id": 982963,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939141151.500, "dur": 1.557, + "args": { + "External id": 982964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939141152.247, "dur": 0.742, + "args": { + "External id": 982965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939141156.992, "dur": 3.656, + "args": { + "External id": 982966,"Record function id": 0, "Ev Idx": 5557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939141158.091, "dur": 2.135, + "args": { + "External id": 982967,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939141158.640, "dur": 1.087, + "args": { + "External id": 982968,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939141159.000, "dur": 0.662, + "args": { + "External id": 982969,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939141164.109, "dur": 3.707, + "args": { + "External id": 982970,"Record function id": 0, "Ev Idx": 5561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939141165.305, "dur": 2.079, + "args": { + "External id": 982971,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939141165.801, "dur": 1.001, + "args": { + "External id": 982972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939141166.189, "dur": 0.548, + "args": { + "External id": 982973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345939141172.534, "dur": 63044.089, + "args": { + "External id": 982974,"Record function id": 0, "Sequence number": 10552245, "Fwd thread id": 1, "Ev Idx": 5565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345939141174.517, "dur": 63031.558, + "args": { + "External id": 982975,"Sequence number": 10552245, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5566 + } + }, + { + "ph": "f", "id": 221, "pid": 2338706, "tid": 2379422, "ts": 6345939141174.517, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.1)", "pid": 2338706, "tid": 2379422, + "ts": 6345939141211.169, "dur": 40.243, + "args": { + "External id": 982976,"Record function id": 0, "Ev Idx": 5567 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.1)", "pid": 2338706, "tid": 2379422, + "ts": 6345939141259.876, "dur": 74.134, + "args": { + "External id": 982977,"Record function id": 0, "Ev Idx": 5568 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.1)", "pid": 2338706, "tid": 2379422, + "ts": 6345939141340.682, "dur": 62856.780, + "args": { + "External id": 982978,"Record function id": 0, "Ev Idx": 5569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939141434.618, "dur": 7.896, + "args": { + "External id": 982979,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939141452.582, "dur": 7.469, + "args": { + "External id": 982980,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345939141474.903, "dur": 61825.441, + "args": { + "External id": 982981,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345939141489.588, "dur": 61796.091, + "args": { + "External id": 982982,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939141580.505, "dur": 18.927, + "args": { + "External id": 982983,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345939141620.772, "dur": 61618.723, + "args": { + "External id": 982984,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345939141626.309, "dur": 61612.071, + "args": { + "External id": 982985,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939141632.864, "dur": 10.285, + "args": { + "External id": 982986,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345939141645.213, "dur": 61587.599, + "args": { + "External id": 982987,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939203415.950, "dur": 13.903, + "args": { + "External id": 982988,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939203420.077, "dur": 9.385, + "args": { + "External id": 982989,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345939203460.524, "dur": 366.353, + "args": { + "External id": 982990,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345939203497.193, "dur": 324.645, + "args": { + "External id": 982991,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5582, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345939203510.811, "dur": 305.398, + "args": { + "External id": 982992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345939203849.106, "dur": 2.125, + "args": { + "External id": 982993,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5584, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939203910.854, "dur": 6.996, + "args": { + "External id": 982994,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939203965.932, "dur": 1.453, + "args": { + "External id": 982995,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939203984.308, "dur": 3.410, + "args": { + "External id": 982996,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939203999.995, "dur": 0.879, + "args": { + "External id": 982997,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939204035.810, "dur": 1.393, + "args": { + "External id": 982998,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939204049.782, "dur": 0.854, + "args": { + "External id": 982999,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939204105.323, "dur": 3.156, + "args": { + "External id": 983000,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939204119.172, "dur": 2.367, + "args": { + "External id": 983001,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939204132.044, "dur": 0.849, + "args": { + "External id": 983002,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345939204233.778, "dur": 2443.231, + "args": { + "External id": 983003,"Record function id": 0, "Ev Idx": 5594 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.0)", "pid": 2338706, "tid": 2379422, + "ts": 6345939204255.298, "dur": 469.204, + "args": { + "External id": 983004,"Record function id": 0, "Ev Idx": 5595 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338706, "tid": 2379422, + "ts": 6345939204269.473, "dur": 352.846, + "args": { + "External id": 983005,"Record function id": 0, "Ev Idx": 5596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939204368.257, "dur": 4.671, + "args": { + "External id": 983006,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939204376.008, "dur": 1.247, + "args": { + "External id": 983007,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939204379.293, "dur": 3.284, + "args": { + "External id": 983008,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939204386.382, "dur": 0.899, + "args": { + "External id": 983009,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939204389.257, "dur": 1.125, + "args": { + "External id": 983010,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939204392.198, "dur": 0.885, + "args": { + "External id": 983011,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939204394.885, "dur": 2.144, + "args": { + "External id": 983012,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939204400.768, "dur": 1.118, + "args": { + "External id": 983013,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939204404.017, "dur": 1.307, + "args": { + "External id": 983014,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939204407.562, "dur": 0.854, + "args": { + "External id": 983015,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345939204427.682, "dur": 163.423, + "args": { + "External id": 983016,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345939204445.503, "dur": 140.433, + "args": { + "External id": 983017,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939204466.793, "dur": 18.976, + "args": { + "External id": 983018,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345939204489.504, "dur": 67.870, + "args": { + "External id": 983019,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345939204493.724, "dur": 63.277, + "args": { + "External id": 983020,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939204497.963, "dur": 6.606, + "args": { + "External id": 983021,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345939204506.299, "dur": 50.029, + "args": { + "External id": 983022,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345939204732.539, "dur": 1918.366, + "args": { + "External id": 983023,"Sequence number": 10552244, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5614 + } + }, + { + "ph": "f", "id": 222, "pid": 2338706, "tid": 2379422, "ts": 6345939204732.539, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939204842.730, "dur": 105.264, + "args": { + "External id": 983024,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345939204993.802, "dur": 102.606, + "args": { + "External id": 983025,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345939205120.326, "dur": 58.625, + "args": { + "External id": 983026,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939205189.650, "dur": 33.374, + "args": { + "External id": 983027,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939205229.790, "dur": 32.609, + "args": { + "External id": 983028,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939205270.946, "dur": 28.039, + "args": { + "External id": 983029,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939205308.362, "dur": 28.676, + "args": { + "External id": 983030,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345939205364.965, "dur": 27.912, + "args": { + "External id": 983031,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345939205415.471, "dur": 28.297, + "args": { + "External id": 983032,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345939205468.628, "dur": 18.766, + "args": { + "External id": 983033,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345939205503.199, "dur": 13.856, + "args": { + "External id": 983034,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939205527.302, "dur": 34.842, + "args": { + "External id": 983035,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939205565.898, "dur": 31.413, + "args": { + "External id": 983036,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345939205628.750, "dur": 348.476, + "args": { + "External id": 983037,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939205710.264, "dur": 8.380, + "args": { + "External id": 983038,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939205720.943, "dur": 2.622, + "args": { + "External id": 983039,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939205725.480, "dur": 4.387, + "args": { + "External id": 983040,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939205809.777, "dur": 11.868, + "args": { + "External id": 983041,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345939205866.590, "dur": 4.900, + "args": { + "External id": 983042,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939205868.508, "dur": 2.813, + "args": { + "External id": 983043,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345939205873.251, "dur": 32.411, + "args": { + "External id": 983044,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939205879.120, "dur": 2.939, + "args": { + "External id": 983045,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345939205909.222, "dur": 1.522, + "args": { + "External id": 983046,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939205910.152, "dur": 0.492, + "args": { + "External id": 983047,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345939205911.593, "dur": 16.458, + "args": { + "External id": 983048,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939205915.633, "dur": 0.454, + "args": { + "External id": 983049,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345939206040.011, "dur": 65.724, + "args": { + "External id": 983050,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345939206131.056, "dur": 16.782, + "args": { + "External id": 983051,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939206167.254, "dur": 54.374, + "args": { + "External id": 983052,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939206230.119, "dur": 40.522, + "args": { + "External id": 983053,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939206280.701, "dur": 20.540, + "args": { + "External id": 983054,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939206307.008, "dur": 30.404, + "args": { + "External id": 983055,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939206343.777, "dur": 27.267, + "args": { + "External id": 983056,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345939206379.438, "dur": 34.824, + "args": { + "External id": 983057,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345939206441.611, "dur": 28.398, + "args": { + "External id": 983058,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345939206490.518, "dur": 25.400, + "args": { + "External id": 983059,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345939206532.489, "dur": 18.060, + "args": { + "External id": 983060,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345939206569.549, "dur": 15.970, + "args": { + "External id": 983061,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345939206600.884, "dur": 18.636, + "args": { + "External id": 983062,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939206700.760, "dur": 15.472, + "args": { + "External id": 983063,"Record function id": 0, "Ev Idx": 5654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939206704.235, "dur": 10.905, + "args": { + "External id": 983064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939206708.756, "dur": 5.409, + "args": { + "External id": 983065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939206710.205, "dur": 3.864, + "args": { + "External id": 983066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939206720.224, "dur": 7.025, + "args": { + "External id": 983067,"Record function id": 0, "Ev Idx": 5658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939206721.894, "dur": 4.896, + "args": { + "External id": 983068,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939206723.019, "dur": 3.181, + "args": { + "External id": 983069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939206723.488, "dur": 2.608, + "args": { + "External id": 983070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939206730.627, "dur": 4.306, + "args": { + "External id": 983071,"Record function id": 0, "Ev Idx": 5662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939206732.258, "dur": 2.233, + "args": { + "External id": 983072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939206732.909, "dur": 1.181, + "args": { + "External id": 983073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939206733.376, "dur": 0.642, + "args": { + "External id": 983074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939206738.096, "dur": 4.883, + "args": { + "External id": 983075,"Record function id": 0, "Ev Idx": 5666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939206739.802, "dur": 2.745, + "args": { + "External id": 983076,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939206740.807, "dur": 1.316, + "args": { + "External id": 983077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939206741.243, "dur": 0.812, + "args": { + "External id": 983078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939206746.116, "dur": 3.891, + "args": { + "External id": 983079,"Record function id": 0, "Ev Idx": 5670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939206747.720, "dur": 1.897, + "args": { + "External id": 983080,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939206748.187, "dur": 1.001, + "args": { + "External id": 983081,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939206748.478, "dur": 0.636, + "args": { + "External id": 983082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939206753.112, "dur": 6.673, + "args": { + "External id": 983083,"Record function id": 0, "Ev Idx": 5674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939206754.587, "dur": 4.771, + "args": { + "External id": 983084,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939206755.123, "dur": 3.389, + "args": { + "External id": 983085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939206757.775, "dur": 0.629, + "args": { + "External id": 983086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939206763.204, "dur": 4.291, + "args": { + "External id": 983087,"Record function id": 0, "Ev Idx": 5678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939206764.645, "dur": 2.452, + "args": { + "External id": 983088,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939206765.298, "dur": 1.291, + "args": { + "External id": 983089,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939206765.741, "dur": 0.782, + "args": { + "External id": 983090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939206770.995, "dur": 4.670, + "args": { + "External id": 983091,"Record function id": 0, "Ev Idx": 5682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939206773.000, "dur": 2.258, + "args": { + "External id": 983092,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939206773.743, "dur": 1.060, + "args": { + "External id": 983093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939206774.192, "dur": 0.537, + "args": { + "External id": 983094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939206778.963, "dur": 6.749, + "args": { + "External id": 983095,"Record function id": 0, "Ev Idx": 5686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939206780.291, "dur": 4.907, + "args": { + "External id": 983096,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939206781.491, "dur": 3.276, + "args": { + "External id": 983097,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939206781.814, "dur": 2.885, + "args": { + "External id": 983098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345939206790.436, "dur": 65251.608, + "args": { + "External id": 983099,"Record function id": 0, "Sequence number": 10552243, "Fwd thread id": 1, "Ev Idx": 5690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345939206792.351, "dur": 65239.398, + "args": { + "External id": 983100,"Sequence number": 10552243, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5691 + } + }, + { + "ph": "f", "id": 223, "pid": 2338706, "tid": 2379422, "ts": 6345939206792.351, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.0)", "pid": 2338706, "tid": 2379422, + "ts": 6345939206825.068, "dur": 41.098, + "args": { + "External id": 983101,"Record function id": 0, "Ev Idx": 5692 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.0)", "pid": 2338706, "tid": 2379422, + "ts": 6345939206874.227, "dur": 95.503, + "args": { + "External id": 983102,"Record function id": 0, "Ev Idx": 5693 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.0)", "pid": 2338706, "tid": 2379422, + "ts": 6345939206979.313, "dur": 65024.907, + "args": { + "External id": 983103,"Record function id": 0, "Ev Idx": 5694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939207138.836, "dur": 9.678, + "args": { + "External id": 983104,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939207160.566, "dur": 5.470, + "args": { + "External id": 983105,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345939207185.571, "dur": 63990.339, + "args": { + "External id": 983106,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345939207200.138, "dur": 63961.885, + "args": { + "External id": 983107,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939207317.340, "dur": 21.043, + "args": { + "External id": 983108,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345939207357.907, "dur": 63755.046, + "args": { + "External id": 983109,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345939207363.754, "dur": 63748.117, + "args": { + "External id": 983110,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939207368.425, "dur": 10.875, + "args": { + "External id": 983111,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345939207381.409, "dur": 63725.557, + "args": { + "External id": 983112,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939271289.665, "dur": 11.476, + "args": { + "External id": 983113,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939271293.101, "dur": 7.709, + "args": { + "External id": 983114,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345939271334.298, "dur": 374.282, + "args": { + "External id": 983115,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345939271367.749, "dur": 335.801, + "args": { + "External id": 983116,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5707, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345939271382.121, "dur": 316.280, + "args": { + "External id": 983117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345939271732.441, "dur": 2.205, + "args": { + "External id": 983118,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5709, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939271790.770, "dur": 6.640, + "args": { + "External id": 983119,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939271842.481, "dur": 3.514, + "args": { + "External id": 983120,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939271862.114, "dur": 1.246, + "args": { + "External id": 983121,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939271876.368, "dur": 0.847, + "args": { + "External id": 983122,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939271889.502, "dur": 1.000, + "args": { + "External id": 983123,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939271901.684, "dur": 2.707, + "args": { + "External id": 983124,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939271914.668, "dur": 0.676, + "args": { + "External id": 983125,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939271928.690, "dur": 1.957, + "args": { + "External id": 983126,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939271941.704, "dur": 0.605, + "args": { + "External id": 983127,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345939272092.099, "dur": 304.683, + "args": { + "External id": 983128,"Record function id": 0, "Sequence number": 10552242, "Fwd thread id": 1, "Ev Idx": 5719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345939272096.053, "dur": 290.711, + "args": { + "External id": 983129,"Sequence number": 10552242, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5720 + } + }, + { + "ph": "f", "id": 224, "pid": 2338706, "tid": 2379422, "ts": 6345939272096.053, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_0", "pid": 2338706, "tid": 2379422, + "ts": 6345939272228.077, "dur": 53.750, + "args": { + "External id": 983130,"kernel_hash": "cwi477k6wdh6lokdagawsji5sufpvs7jjfv77xwkpv6wyl2co2tv", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/wi/cwi477k6wdh6lokdagawsji5sufpvs7jjfv77xwkpv6wyl2co2tv.py", "kernel_backend": "triton", "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 5721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_1", "pid": 2338706, "tid": 2379422, + "ts": 6345939272301.852, "dur": 26.132, + "args": { + "External id": 983131,"kernel_hash": "cdaibgaofwvuhdtnzghakbdinq55v26czvh7dtqt726q3xahddo3", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/da/cdaibgaofwvuhdtnzghakbdinq55v26czvh7dtqt726q3xahddo3.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096, 4096], [32000, 4096], []], "Ev Idx": 5722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_2", "pid": 2338706, "tid": 2379422, + "ts": 6345939272349.344, "dur": 21.239, + "args": { + "External id": 983132,"kernel_hash": "cozpxeylo6qxtrc76ucyd7ba7bcr4ctg436exqn3bklybds2girm", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/oz/cozpxeylo6qxtrc76ucyd7ba7bcr4ctg436exqn3bklybds2girm.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 5723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939272407.850, "dur": 14.727, + "args": { + "External id": 983133,"Record function id": 0, "Ev Idx": 5724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345939272411.567, "dur": 10.003, + "args": { + "External id": 983134,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 5725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939272415.101, "dur": 5.541, + "args": { + "External id": 983135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 5726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345939272416.446, "dur": 4.056, + "args": { + "External id": 983136,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 5727 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_post_backward_callback", "pid": 2338706, "tid": 2379422, + "ts": 6345939272446.784, "dur": 17909.860, + "args": { + "External id": 983137,"Record function id": 0, "Ev Idx": 5728 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate", "pid": 2338706, "tid": 2379422, + "ts": 6345939272466.423, "dur": 42.978, + "args": { + "External id": 983138,"Record function id": 0, "Ev Idx": 5729 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard", "pid": 2338706, "tid": 2379422, + "ts": 6345939272515.846, "dur": 295.172, + "args": { + "External id": 983139,"Record function id": 0, "Ev Idx": 5730 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce", "pid": 2338706, "tid": 2379422, + "ts": 6345939272817.231, "dur": 17274.163, + "args": { + "External id": 983140,"Record function id": 0, "Ev Idx": 5731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939272942.202, "dur": 7.269, + "args": { + "External id": 983141,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345939272959.660, "dur": 5.057, + "args": { + "External id": 983142,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 5733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345939272989.865, "dur": 15730.688, + "args": { + "External id": 983143,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 5734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345939273024.028, "dur": 15680.192, + "args": { + "External id": 983144,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 5735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939273878.359, "dur": 25.255, + "args": { + "External id": 983145,"Record function id": 0, "Concrete Inputs": ["[277237]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345939274196.255, "dur": 14453.216, + "args": { + "External id": 983146,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], [], []], "Ev Idx": 5737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345939274200.156, "dur": 14447.982, + "args": { + "External id": 983147,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], []], "Ev Idx": 5738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939274208.265, "dur": 18.774, + "args": { + "External id": 983148,"Record function id": 0, "Concrete Inputs": ["[277237]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345939274230.174, "dur": 14410.992, + "args": { + "External id": 983149,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[277237], [277237], []], "Ev Idx": 5740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939288874.950, "dur": 12.261, + "args": { + "External id": 983150,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1134596096], [], [], [], [], []], "Ev Idx": 5741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345939288878.651, "dur": 8.196, + "args": { + "External id": 983151,"Record function id": 0, "Concrete Inputs": ["[141824512]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345939288918.585, "dur": 418.575, + "args": { + "External id": 983152,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[141824512], [1134596096], [], [], [], []], "Ev Idx": 5743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345939288950.489, "dur": 380.780, + "args": { + "External id": 983153,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 141824512, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1134596096], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5744, "In msg nelems": 1134596096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345939288963.232, "dur": 361.383, + "args": { + "External id": 983154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1134596096]], "Ev Idx": 5745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345939289363.229, "dur": 2.908, + "args": { + "External id": 983155,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5746, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289428.992, "dur": 9.144, + "args": { + "External id": 983156,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289483.946, "dur": 1.121, + "args": { + "External id": 983157,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289501.118, "dur": 1.267, + "args": { + "External id": 983158,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "16384512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289515.595, "dur": 0.888, + "args": { + "External id": 983159,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "18481664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289529.051, "dur": 2.828, + "args": { + "External id": 983160,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "19005952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289545.026, "dur": 0.668, + "args": { + "External id": 983161,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "19530240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289556.776, "dur": 2.040, + "args": { + "External id": 983162,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289570.237, "dur": 0.939, + "args": { + "External id": 983163,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "21627904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289581.735, "dur": 2.808, + "args": { + "External id": 983164,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "28967936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289593.991, "dur": 0.866, + "args": { + "External id": 983165,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "36307968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289605.811, "dur": 0.951, + "args": { + "External id": 983166,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289619.029, "dur": 0.704, + "args": { + "External id": 983167,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "43648512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289630.844, "dur": 3.066, + "args": { + "External id": 983168,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "45745664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289644.536, "dur": 0.748, + "args": { + "External id": 983169,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "46269952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289655.387, "dur": 1.120, + "args": { + "External id": 983170,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "46794240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289668.733, "dur": 0.516, + "args": { + "External id": 983171,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289682.647, "dur": 2.689, + "args": { + "External id": 983172,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "48891904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289699.709, "dur": 1.296, + "args": { + "External id": 983173,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "56231936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289711.534, "dur": 1.335, + "args": { + "External id": 983174,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "63571968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289723.353, "dur": 0.972, + "args": { + "External id": 983175,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289735.877, "dur": 4.099, + "args": { + "External id": 983176,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "70912512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289753.420, "dur": 1.034, + "args": { + "External id": 983177,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73009664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289765.327, "dur": 0.907, + "args": { + "External id": 983178,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73533952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289787.070, "dur": 1.024, + "args": { + "External id": 983179,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "74058240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289799.902, "dur": 3.169, + "args": { + "External id": 983180,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289812.706, "dur": 0.878, + "args": { + "External id": 983181,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "76155904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289826.055, "dur": 1.170, + "args": { + "External id": 983182,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "83495936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289838.349, "dur": 0.964, + "args": { + "External id": 983183,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "90835968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289850.244, "dur": 2.657, + "args": { + "External id": 983184,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289863.351, "dur": 0.918, + "args": { + "External id": 983185,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "98176512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289875.370, "dur": 0.599, + "args": { + "External id": 983186,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100273664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289888.702, "dur": 0.872, + "args": { + "External id": 983187,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100797952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289900.342, "dur": 2.851, + "args": { + "External id": 983188,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "101322240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289914.383, "dur": 0.641, + "args": { + "External id": 983189,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289925.426, "dur": 2.094, + "args": { + "External id": 983190,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "103419904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289937.531, "dur": 1.166, + "args": { + "External id": 983191,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "110759936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289951.731, "dur": 3.122, + "args": { + "External id": 983192,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "118099968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289964.996, "dur": 0.984, + "args": { + "External id": 983193,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345939289975.680, "dur": 1.153, + "args": { + "External id": 983194,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "125440512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: DivBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940431684.083, "dur": 111.894, + "args": { + "External id": 983195,"Record function id": 0, "Sequence number": 10552697, "Fwd thread id": 1, "Ev Idx": 5786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "DivBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940431693.096, "dur": 92.972, + "args": { + "External id": 983196,"Sequence number": 10552697, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5787 + } + }, + { + "ph": "f", "id": 225, "pid": 2338706, "tid": 2379422, "ts": 6345940431693.096, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338706, "tid": 2379422, + "ts": 6345940431702.102, "dur": 82.267, + "args": { + "External id": 983197,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940431804.220, "dur": 323.468, + "args": { + "External id": 983198,"Record function id": 0, "Ev Idx": 5789 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940431882.112, "dur": 102.496, + "args": { + "External id": 983199,"Record function id": 0, "Ev Idx": 5790 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.25", "pid": 2338706, "tid": 2379422, + "ts": 6345940431914.777, "dur": 57.706, + "args": { + "External id": 983200,"Record function id": 0, "Ev Idx": 5791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940431990.170, "dur": 1.927, + "args": { + "External id": 983201,"Sequence number": 10552696, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5792 + } + }, + { + "ph": "f", "id": 226, "pid": 2338706, "tid": 2379422, "ts": 6345940431990.170, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940431997.039, "dur": 122.642, + "args": { + "External id": 983202,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940432002.325, "dur": 116.188, + "args": { + "External id": 983203,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940432029.429, "dur": 4.678, + "args": { + "External id": 983204,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940432141.444, "dur": 38133.549, + "args": { + "External id": 983205,"Record function id": 0, "Sequence number": 10552694, "Fwd thread id": 1, "Ev Idx": 5796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940432144.114, "dur": 38114.674, + "args": { + "External id": 983206,"Sequence number": 10552694, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5797 + } + }, + { + "ph": "f", "id": 227, "pid": 2338706, "tid": 2379422, "ts": 6345940432144.114, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940432193.042, "dur": 4.180, + "args": { + "External id": 983207,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345940432200.542, "dur": 37700.057, + "args": { + "External id": 983208,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940432202.407, "dur": 37697.778, + "args": { + "External id": 983209,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940432206.252, "dur": 7.192, + "args": { + "External id": 983210,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940432215.377, "dur": 37682.877, + "args": { + "External id": 983211,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338706, "tid": 2379422, + "ts": 6345940469906.993, "dur": 0.681, + "args": { + "External id": 983212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338706, "tid": 2379422, + "ts": 6345940469910.112, "dur": 5.393, + "args": { + "External id": 983213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338706, "tid": 2379422, + "ts": 6345940469913.994, "dur": 1.269, + "args": { + "External id": 983214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338706, "tid": 2379422, + "ts": 6345940469922.419, "dur": 32.644, + "args": { + "External id": 983215,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338706, "tid": 2379422, + "ts": 6345940469964.209, "dur": 61.178, + "args": { + "External id": 983216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338706, "tid": 2379422, + "ts": 6345940469966.410, "dur": 58.780, + "args": { + "External id": 983217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338706, "tid": 2379422, + "ts": 6345940469968.210, "dur": 56.343, + "args": { + "External id": 983218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940470291.668, "dur": 22.235, + "args": { + "External id": 983219,"Record function id": 0, "Sequence number": 10552693, "Fwd thread id": 1, "Ev Idx": 5810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940470294.207, "dur": 15.627, + "args": { + "External id": 983220,"Sequence number": 10552693, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 5811 + } + }, + { + "ph": "f", "id": 228, "pid": 2338706, "tid": 2379422, "ts": 6345940470294.207, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940470300.623, "dur": 8.949, + "args": { + "External id": 983221,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940470303.087, "dur": 6.300, + "args": { + "External id": 983222,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940470318.074, "dur": 119.621, + "args": { + "External id": 983223,"Record function id": 0, "Sequence number": 10552692, "Fwd thread id": 1, "Ev Idx": 5814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940470319.253, "dur": 109.289, + "args": { + "External id": 983224,"Sequence number": 10552692, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5815 + } + }, + { + "ph": "f", "id": 229, "pid": 2338706, "tid": 2379422, "ts": 6345940470319.253, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940470324.152, "dur": 103.831, + "args": { + "External id": 983225,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 5816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345940470333.268, "dur": 39.226, + "args": { + "External id": 983226,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940470336.629, "dur": 9.465, + "args": { + "External id": 983227,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345940470348.234, "dur": 23.734, + "args": { + "External id": 983228,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345940470351.407, "dur": 20.128, + "args": { + "External id": 983229,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 5820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345940470375.081, "dur": 6.154, + "args": { + "External id": 983230,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 5821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940470379.046, "dur": 1.795, + "args": { + "External id": 983231,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940470382.708, "dur": 44.179, + "args": { + "External id": 983232,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940470442.820, "dur": 63.534, + "args": { + "External id": 983233,"Record function id": 0, "Sequence number": 10552691, "Fwd thread id": 1, "Ev Idx": 5824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940470443.943, "dur": 57.788, + "args": { + "External id": 983234,"Sequence number": 10552691, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5825 + } + }, + { + "ph": "f", "id": 230, "pid": 2338706, "tid": 2379422, "ts": 6345940470443.943, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940470447.828, "dur": 53.638, + "args": { + "External id": 983235,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "3"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345940470452.001, "dur": 20.315, + "args": { + "External id": 983236,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940470453.803, "dur": 2.947, + "args": { + "External id": 983237,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345940470457.603, "dur": 14.429, + "args": { + "External id": 983238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345940470460.143, "dur": 11.512, + "args": { + "External id": 983239,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2379422, + "ts": 6345940470476.263, "dur": 7.059, + "args": { + "External id": 983240,"Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 5831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940470481.624, "dur": 1.015, + "args": { + "External id": 983241,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940470484.345, "dur": 16.490, + "args": { + "External id": 983242,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940470510.584, "dur": 225.126, + "args": { + "External id": 983243,"Record function id": 0, "Sequence number": 10552690, "Fwd thread id": 1, "Ev Idx": 5834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940470512.182, "dur": 218.730, + "args": { + "External id": 983244,"Sequence number": 10552690, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5835 + } + }, + { + "ph": "f", "id": 231, "pid": 2338706, "tid": 2379422, "ts": 6345940470512.182, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940470514.368, "dur": 216.160, + "args": { + "External id": 983245,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345940470516.536, "dur": 16.586, + "args": { + "External id": 983246,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940470518.015, "dur": 2.660, + "args": { + "External id": 983247,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345940470521.402, "dur": 11.466, + "args": { + "External id": 983248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345940470522.737, "dur": 9.789, + "args": { + "External id": 983249,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345940470536.756, "dur": 6.356, + "args": { + "External id": 983250,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940470539.587, "dur": 3.302, + "args": { + "External id": 983251,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940470544.042, "dur": 185.507, + "args": { + "External id": 983252,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940470740.247, "dur": 115.405, + "args": { + "External id": 983253,"Record function id": 0, "Sequence number": 10552689, "Fwd thread id": 1, "Ev Idx": 5844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940470742.008, "dur": 108.534, + "args": { + "External id": 983254,"Sequence number": 10552689, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5845 + } + }, + { + "ph": "f", "id": 232, "pid": 2338706, "tid": 2379422, "ts": 6345940470742.008, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940470744.736, "dur": 105.342, + "args": { + "External id": 983255,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345940470746.388, "dur": 18.878, + "args": { + "External id": 983256,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940470748.145, "dur": 2.460, + "args": { + "External id": 983257,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345940470751.319, "dur": 13.691, + "args": { + "External id": 983258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345940470752.727, "dur": 11.745, + "args": { + "External id": 983259,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345940470766.302, "dur": 3.240, + "args": { + "External id": 983260,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940470768.517, "dur": 0.760, + "args": { + "External id": 983261,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940470796.614, "dur": 52.409, + "args": { + "External id": 983262,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940470860.983, "dur": 42.214, + "args": { + "External id": 983263,"Record function id": 0, "Sequence number": 10552688, "Fwd thread id": 1, "Ev Idx": 5854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940470865.286, "dur": 0.978, + "args": { + "External id": 983264,"Sequence number": 10552688, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5855 + } + }, + { + "ph": "f", "id": 233, "pid": 2338706, "tid": 2379422, "ts": 6345940470865.286, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940470869.723, "dur": 28.196, + "args": { + "External id": 983265,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940470872.481, "dur": 24.988, + "args": { + "External id": 983266,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940470879.261, "dur": 0.825, + "args": { + "External id": 983267,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940470908.742, "dur": 2460.188, + "args": { + "External id": 983268,"Record function id": 0, "Sequence number": 10552686, "Fwd thread id": 1, "Ev Idx": 5859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940470910.550, "dur": 2416.218, + "args": { + "External id": 983269,"Sequence number": 10552686, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5860 + } + }, + { + "ph": "f", "id": 234, "pid": 2338706, "tid": 2379422, "ts": 6345940470910.550, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940470950.986, "dur": 3.577, + "args": { + "External id": 983270,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345940470957.237, "dur": 2138.653, + "args": { + "External id": 983271,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940470959.028, "dur": 2136.554, + "args": { + "External id": 983272,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940470962.490, "dur": 5.641, + "args": { + "External id": 983273,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940470969.238, "dur": 2125.126, + "args": { + "External id": 983274,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338706, "tid": 2379422, + "ts": 6345940473100.325, "dur": 0.473, + "args": { + "External id": 983275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338706, "tid": 2379422, + "ts": 6345940473102.481, "dur": 2.592, + "args": { + "External id": 983276,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338706, "tid": 2379422, + "ts": 6345940473103.998, "dur": 0.935, + "args": { + "External id": 983277,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338706, "tid": 2379422, + "ts": 6345940473109.809, "dur": 24.514, + "args": { + "External id": 983278,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338706, "tid": 2379422, + "ts": 6345940473142.963, "dur": 40.310, + "args": { + "External id": 983279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338706, "tid": 2379422, + "ts": 6345940473144.569, "dur": 38.453, + "args": { + "External id": 983280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338706, "tid": 2379422, + "ts": 6345940473145.976, "dur": 36.751, + "args": { + "External id": 983281,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940473342.429, "dur": 21.738, + "args": { + "External id": 983282,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 5873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940473381.658, "dur": 16.491, + "args": { + "External id": 983283,"Record function id": 0, "Sequence number": 10552685, "Fwd thread id": 1, "Ev Idx": 5874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940473383.191, "dur": 11.541, + "args": { + "External id": 983284,"Sequence number": 10552685, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 5875 + } + }, + { + "ph": "f", "id": 235, "pid": 2338706, "tid": 2379422, "ts": 6345940473383.191, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940473387.527, "dur": 6.986, + "args": { + "External id": 983285,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940473389.520, "dur": 4.834, + "args": { + "External id": 983286,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940473401.859, "dur": 82.725, + "args": { + "External id": 983287,"Record function id": 0, "Sequence number": 10552684, "Fwd thread id": 1, "Ev Idx": 5878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940473402.624, "dur": 75.621, + "args": { + "External id": 983288,"Sequence number": 10552684, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5879 + } + }, + { + "ph": "f", "id": 236, "pid": 2338706, "tid": 2379422, "ts": 6345940473402.624, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940473406.031, "dur": 71.940, + "args": { + "External id": 983289,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 5880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345940473409.332, "dur": 25.644, + "args": { + "External id": 983290,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940473411.993, "dur": 3.467, + "args": { + "External id": 983291,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345940473416.551, "dur": 18.097, + "args": { + "External id": 983292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345940473421.249, "dur": 12.958, + "args": { + "External id": 983293,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 5884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345940473436.617, "dur": 5.152, + "args": { + "External id": 983294,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 5885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940473440.298, "dur": 1.025, + "args": { + "External id": 983295,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940473442.884, "dur": 34.266, + "args": { + "External id": 983296,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940473488.977, "dur": 76.585, + "args": { + "External id": 983297,"Record function id": 0, "Sequence number": 10552683, "Fwd thread id": 1, "Ev Idx": 5888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940473490.144, "dur": 71.845, + "args": { + "External id": 983298,"Sequence number": 10552683, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5889 + } + }, + { + "ph": "f", "id": 237, "pid": 2338706, "tid": 2379422, "ts": 6345940473490.144, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940473493.517, "dur": 68.199, + "args": { + "External id": 983299,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "2"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345940473496.411, "dur": 17.413, + "args": { + "External id": 983300,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940473498.090, "dur": 2.605, + "args": { + "External id": 983301,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345940473501.699, "dur": 11.871, + "args": { + "External id": 983302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345940473503.024, "dur": 10.160, + "args": { + "External id": 983303,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2379422, + "ts": 6345940473535.883, "dur": 6.908, + "args": { + "External id": 983304,"Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 5895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940473541.156, "dur": 1.118, + "args": { + "External id": 983305,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940473543.909, "dur": 17.112, + "args": { + "External id": 983306,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940473569.820, "dur": 134.337, + "args": { + "External id": 983307,"Record function id": 0, "Sequence number": 10552682, "Fwd thread id": 1, "Ev Idx": 5898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940473570.904, "dur": 128.826, + "args": { + "External id": 983308,"Sequence number": 10552682, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5899 + } + }, + { + "ph": "f", "id": 238, "pid": 2338706, "tid": 2379422, "ts": 6345940473570.904, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940473573.277, "dur": 126.088, + "args": { + "External id": 983309,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345940473575.059, "dur": 21.492, + "args": { + "External id": 983310,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940473576.493, "dur": 4.139, + "args": { + "External id": 983311,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345940473581.423, "dur": 14.870, + "args": { + "External id": 983312,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345940473582.574, "dur": 13.360, + "args": { + "External id": 983313,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345940473599.967, "dur": 4.113, + "args": { + "External id": 983314,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940473602.950, "dur": 0.893, + "args": { + "External id": 983315,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940473605.038, "dur": 93.373, + "args": { + "External id": 983316,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940473710.911, "dur": 107.274, + "args": { + "External id": 983317,"Record function id": 0, "Sequence number": 10552681, "Fwd thread id": 1, "Ev Idx": 5908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940473712.368, "dur": 85.036, + "args": { + "External id": 983318,"Sequence number": 10552681, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5909 + } + }, + { + "ph": "f", "id": 239, "pid": 2338706, "tid": 2379422, "ts": 6345940473712.368, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940473714.829, "dur": 82.314, + "args": { + "External id": 983319,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345940473716.168, "dur": 18.517, + "args": { + "External id": 983320,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940473717.329, "dur": 2.142, + "args": { + "External id": 983321,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345940473720.174, "dur": 14.255, + "args": { + "External id": 983322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345940473721.613, "dur": 12.414, + "args": { + "External id": 983323,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345940473737.587, "dur": 2.876, + "args": { + "External id": 983324,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940473739.775, "dur": 0.471, + "args": { + "External id": 983325,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940473741.178, "dur": 55.200, + "args": { + "External id": 983326,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940473801.998, "dur": 14.751, + "args": { + "External id": 983327,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940473823.100, "dur": 35.332, + "args": { + "External id": 983328,"Record function id": 0, "Sequence number": 10552680, "Fwd thread id": 1, "Ev Idx": 5919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940473824.347, "dur": 1.118, + "args": { + "External id": 983329,"Sequence number": 10552680, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5920 + } + }, + { + "ph": "f", "id": 240, "pid": 2338706, "tid": 2379422, "ts": 6345940473824.347, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940473828.201, "dur": 25.286, + "args": { + "External id": 983330,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940473830.740, "dur": 22.376, + "args": { + "External id": 983331,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940473836.643, "dur": 0.695, + "args": { + "External id": 983332,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940473863.508, "dur": 3493.452, + "args": { + "External id": 983333,"Record function id": 0, "Sequence number": 10552678, "Fwd thread id": 1, "Ev Idx": 5924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940473865.205, "dur": 3458.429, + "args": { + "External id": 983334,"Sequence number": 10552678, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5925 + } + }, + { + "ph": "f", "id": 241, "pid": 2338706, "tid": 2379422, "ts": 6345940473865.205, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940473899.273, "dur": 2.368, + "args": { + "External id": 983335,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345940473903.965, "dur": 3188.830, + "args": { + "External id": 983336,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940473905.534, "dur": 3186.940, + "args": { + "External id": 983337,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940473908.734, "dur": 4.049, + "args": { + "External id": 983338,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940473914.045, "dur": 3177.412, + "args": { + "External id": 983339,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338706, "tid": 2379422, + "ts": 6345940477096.855, "dur": 0.395, + "args": { + "External id": 983340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338706, "tid": 2379422, + "ts": 6345940477098.866, "dur": 5.032, + "args": { + "External id": 983341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338706, "tid": 2379422, + "ts": 6345940477102.940, "dur": 0.804, + "args": { + "External id": 983342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338706, "tid": 2379422, + "ts": 6345940477108.495, "dur": 25.920, + "args": { + "External id": 983343,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338706, "tid": 2379422, + "ts": 6345940477139.809, "dur": 44.155, + "args": { + "External id": 983344,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338706, "tid": 2379422, + "ts": 6345940477141.194, "dur": 42.581, + "args": { + "External id": 983345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338706, "tid": 2379422, + "ts": 6345940477142.601, "dur": 40.909, + "args": { + "External id": 983346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940477335.627, "dur": 16.267, + "args": { + "External id": 983347,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 5938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940477368.436, "dur": 14.581, + "args": { + "External id": 983348,"Record function id": 0, "Sequence number": 10552677, "Fwd thread id": 1, "Ev Idx": 5939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940477370.033, "dur": 9.659, + "args": { + "External id": 983349,"Sequence number": 10552677, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 5940 + } + }, + { + "ph": "f", "id": 242, "pid": 2338706, "tid": 2379422, "ts": 6345940477370.033, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940477373.738, "dur": 5.726, + "args": { + "External id": 983350,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940477375.403, "dur": 3.846, + "args": { + "External id": 983351,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940477386.596, "dur": 79.684, + "args": { + "External id": 983352,"Record function id": 0, "Sequence number": 10552676, "Fwd thread id": 1, "Ev Idx": 5943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940477387.748, "dur": 72.680, + "args": { + "External id": 983353,"Sequence number": 10552676, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5944 + } + }, + { + "ph": "f", "id": 243, "pid": 2338706, "tid": 2379422, "ts": 6345940477387.748, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940477389.594, "dur": 70.412, + "args": { + "External id": 983354,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 5945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345940477395.337, "dur": 21.884, + "args": { + "External id": 983355,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940477397.517, "dur": 3.313, + "args": { + "External id": 983356,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345940477401.703, "dur": 15.239, + "args": { + "External id": 983357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345940477403.313, "dur": 13.122, + "args": { + "External id": 983358,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 5949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345940477418.716, "dur": 7.261, + "args": { + "External id": 983359,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 5950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940477421.937, "dur": 3.649, + "args": { + "External id": 983360,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940477427.767, "dur": 31.456, + "args": { + "External id": 983361,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940477470.201, "dur": 51.880, + "args": { + "External id": 983362,"Record function id": 0, "Sequence number": 10552675, "Fwd thread id": 1, "Ev Idx": 5953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940477471.353, "dur": 47.270, + "args": { + "External id": 983363,"Sequence number": 10552675, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5954 + } + }, + { + "ph": "f", "id": 244, "pid": 2338706, "tid": 2379422, "ts": 6345940477471.353, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940477474.189, "dur": 44.150, + "args": { + "External id": 983364,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345940477476.332, "dur": 18.698, + "args": { + "External id": 983365,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940477477.840, "dur": 3.404, + "args": { + "External id": 983366,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345940477484.163, "dur": 10.606, + "args": { + "External id": 983367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345940477485.096, "dur": 9.347, + "args": { + "External id": 983368,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2379422, + "ts": 6345940477496.267, "dur": 6.219, + "args": { + "External id": 983369,"Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 5960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940477500.823, "dur": 1.186, + "args": { + "External id": 983370,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940477503.203, "dur": 14.472, + "args": { + "External id": 983371,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940477525.902, "dur": 110.290, + "args": { + "External id": 983372,"Record function id": 0, "Sequence number": 10552674, "Fwd thread id": 1, "Ev Idx": 5963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940477527.177, "dur": 104.891, + "args": { + "External id": 983373,"Sequence number": 10552674, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5964 + } + }, + { + "ph": "f", "id": 245, "pid": 2338706, "tid": 2379422, "ts": 6345940477527.177, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940477529.411, "dur": 102.208, + "args": { + "External id": 983374,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345940477530.904, "dur": 17.286, + "args": { + "External id": 983375,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940477532.015, "dur": 1.840, + "args": { + "External id": 983376,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345940477534.970, "dur": 12.946, + "args": { + "External id": 983377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345940477537.774, "dur": 9.777, + "args": { + "External id": 983378,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345940477549.044, "dur": 3.159, + "args": { + "External id": 983379,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940477551.207, "dur": 0.790, + "args": { + "External id": 983380,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940477552.998, "dur": 77.634, + "args": { + "External id": 983381,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940477640.695, "dur": 96.991, + "args": { + "External id": 983382,"Record function id": 0, "Sequence number": 10552673, "Fwd thread id": 1, "Ev Idx": 5973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940477641.871, "dur": 79.028, + "args": { + "External id": 983383,"Sequence number": 10552673, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5974 + } + }, + { + "ph": "f", "id": 246, "pid": 2338706, "tid": 2379422, "ts": 6345940477641.871, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940477644.490, "dur": 76.135, + "args": { + "External id": 983384,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345940477646.234, "dur": 17.225, + "args": { + "External id": 983385,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940477647.588, "dur": 2.078, + "args": { + "External id": 983386,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345940477650.545, "dur": 12.637, + "args": { + "External id": 983387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345940477651.366, "dur": 11.489, + "args": { + "External id": 983388,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345940477666.562, "dur": 3.029, + "args": { + "External id": 983389,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940477668.670, "dur": 0.654, + "args": { + "External id": 983390,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940477670.504, "dur": 49.230, + "args": { + "External id": 983391,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940477724.460, "dur": 11.898, + "args": { + "External id": 983392,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940477742.193, "dur": 39.142, + "args": { + "External id": 983393,"Record function id": 0, "Sequence number": 10552672, "Fwd thread id": 1, "Ev Idx": 5984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940477743.702, "dur": 1.214, + "args": { + "External id": 983394,"Sequence number": 10552672, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5985 + } + }, + { + "ph": "f", "id": 247, "pid": 2338706, "tid": 2379422, "ts": 6345940477743.702, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940477747.313, "dur": 28.405, + "args": { + "External id": 983395,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940477749.200, "dur": 26.034, + "args": { + "External id": 983396,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940477755.001, "dur": 2.570, + "args": { + "External id": 983397,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940477786.411, "dur": 3536.680, + "args": { + "External id": 983398,"Record function id": 0, "Sequence number": 10552671, "Fwd thread id": 1, "Ev Idx": 5989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940477798.447, "dur": 3488.901, + "args": { + "External id": 983399,"Sequence number": 10552671, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5990 + } + }, + { + "ph": "f", "id": 248, "pid": 2338706, "tid": 2379422, "ts": 6345940477798.447, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940477827.945, "dur": 2.479, + "args": { + "External id": 983400,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345940477832.875, "dur": 3250.785, + "args": { + "External id": 983401,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940477834.493, "dur": 3248.828, + "args": { + "External id": 983402,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940477837.651, "dur": 3.093, + "args": { + "External id": 983403,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940477841.697, "dur": 3240.697, + "args": { + "External id": 983404,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338706, "tid": 2379422, + "ts": 6345940481087.251, "dur": 0.692, + "args": { + "External id": 983405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338706, "tid": 2379422, + "ts": 6345940481089.586, "dur": 4.475, + "args": { + "External id": 983406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338706, "tid": 2379422, + "ts": 6345940481092.970, "dur": 0.976, + "args": { + "External id": 983407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338706, "tid": 2379422, + "ts": 6345940481098.118, "dur": 22.145, + "args": { + "External id": 983408,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338706, "tid": 2379422, + "ts": 6345940481125.598, "dur": 40.204, + "args": { + "External id": 983409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338706, "tid": 2379422, + "ts": 6345940481127.033, "dur": 38.597, + "args": { + "External id": 983410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338706, "tid": 2379422, + "ts": 6345940481128.312, "dur": 37.038, + "args": { + "External id": 983411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940481301.108, "dur": 17.335, + "args": { + "External id": 983412,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 6003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940481337.395, "dur": 13.586, + "args": { + "External id": 983413,"Record function id": 0, "Ev Idx": 6004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940481340.262, "dur": 8.833, + "args": { + "External id": 983414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 6005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940481343.421, "dur": 4.655, + "args": { + "External id": 983415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 6006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940481344.212, "dur": 3.743, + "args": { + "External id": 983416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 6007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940481354.701, "dur": 13.379, + "args": { + "External id": 983417,"Record function id": 0, "Sequence number": 10552670, "Fwd thread id": 1, "Ev Idx": 6008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940481355.995, "dur": 9.843, + "args": { + "External id": 983418,"Sequence number": 10552670, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6009 + } + }, + { + "ph": "f", "id": 249, "pid": 2338706, "tid": 2379422, "ts": 6345940481355.995, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940481359.642, "dur": 5.966, + "args": { + "External id": 983419,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940481361.844, "dur": 3.601, + "args": { + "External id": 983420,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940481371.418, "dur": 78.064, + "args": { + "External id": 983421,"Record function id": 0, "Sequence number": 10552669, "Fwd thread id": 1, "Ev Idx": 6012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940481372.879, "dur": 70.713, + "args": { + "External id": 983422,"Sequence number": 10552669, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6013 + } + }, + { + "ph": "f", "id": 250, "pid": 2338706, "tid": 2379422, "ts": 6345940481372.879, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940481376.599, "dur": 66.608, + "args": { + "External id": 983423,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345940481382.988, "dur": 25.018, + "args": { + "External id": 983424,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940481385.326, "dur": 3.824, + "args": { + "External id": 983425,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345940481390.148, "dur": 17.580, + "args": { + "External id": 983426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345940481392.006, "dur": 15.294, + "args": { + "External id": 983427,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345940481409.647, "dur": 4.679, + "args": { + "External id": 983428,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 6019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940481412.563, "dur": 1.400, + "args": { + "External id": 983429,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 6020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940481415.616, "dur": 26.630, + "args": { + "External id": 983430,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940481453.449, "dur": 57.705, + "args": { + "External id": 983431,"Record function id": 0, "Sequence number": 10552668, "Fwd thread id": 1, "Ev Idx": 6022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940481454.748, "dur": 52.624, + "args": { + "External id": 983432,"Sequence number": 10552668, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6023 + } + }, + { + "ph": "f", "id": 251, "pid": 2338706, "tid": 2379422, "ts": 6345940481454.748, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940481456.758, "dur": 50.379, + "args": { + "External id": 983433,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 6024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345940481459.251, "dur": 24.957, + "args": { + "External id": 983434,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940481460.667, "dur": 4.676, + "args": { + "External id": 983435,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345940481468.032, "dur": 15.918, + "args": { + "External id": 983436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345940481471.001, "dur": 12.571, + "args": { + "External id": 983437,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 6028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2379422, + "ts": 6345940481485.522, "dur": 5.190, + "args": { + "External id": 983438,"Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940481489.322, "dur": 0.834, + "args": { + "External id": 983439,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940481491.728, "dur": 14.784, + "args": { + "External id": 983440,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940481514.633, "dur": 121.687, + "args": { + "External id": 983441,"Record function id": 0, "Sequence number": 10552667, "Fwd thread id": 1, "Ev Idx": 6032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940481515.899, "dur": 115.830, + "args": { + "External id": 983442,"Sequence number": 10552667, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6033 + } + }, + { + "ph": "f", "id": 252, "pid": 2338706, "tid": 2379422, "ts": 6345940481515.899, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940481518.455, "dur": 112.854, + "args": { + "External id": 983443,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 6034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345940481519.999, "dur": 27.287, + "args": { + "External id": 983444,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940481521.992, "dur": 2.135, + "args": { + "External id": 983445,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345940481529.297, "dur": 17.678, + "args": { + "External id": 983446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345940481534.935, "dur": 11.621, + "args": { + "External id": 983447,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 6038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345940481548.484, "dur": 3.131, + "args": { + "External id": 983448,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 6039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940481550.595, "dur": 0.728, + "args": { + "External id": 983449,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940481552.546, "dur": 77.997, + "args": { + "External id": 983450,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 6041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940481642.387, "dur": 104.278, + "args": { + "External id": 983451,"Record function id": 0, "Sequence number": 10552666, "Fwd thread id": 1, "Ev Idx": 6042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940481643.658, "dur": 84.807, + "args": { + "External id": 983452,"Sequence number": 10552666, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6043 + } + }, + { + "ph": "f", "id": 253, "pid": 2338706, "tid": 2379422, "ts": 6345940481643.658, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940481645.589, "dur": 82.474, + "args": { + "External id": 983453,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 6044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2379422, + "ts": 6345940481647.333, "dur": 22.110, + "args": { + "External id": 983454,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940481648.716, "dur": 2.174, + "args": { + "External id": 983455,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2379422, + "ts": 6345940481651.507, "dur": 17.678, + "args": { + "External id": 983456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2379422, + "ts": 6345940481652.657, "dur": 16.146, + "args": { + "External id": 983457,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 6048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345940481672.294, "dur": 3.292, + "args": { + "External id": 983458,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 6049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940481674.577, "dur": 0.735, + "args": { + "External id": 983459,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940481676.710, "dur": 50.753, + "args": { + "External id": 983460,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 6051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940481732.523, "dur": 12.133, + "args": { + "External id": 983461,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 6052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940481753.536, "dur": 413.018, + "args": { + "External id": 983462,"Record function id": 0, "Sequence number": 10552665, "Fwd thread id": 1, "Ev Idx": 6053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940481755.483, "dur": 398.145, + "args": { + "External id": 983463,"Sequence number": 10552665, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6054 + } + }, + { + "ph": "f", "id": 254, "pid": 2338706, "tid": 2379422, "ts": 6345940481755.483, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345940481920.078, "dur": 42.574, + "args": { + "External id": 983464,"kernel_hash": "cxv3u4whmia24aefmfuiol55m7osx2elddgy6rxbkbvmnigizet6", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "131072", "4096", "1", "993", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xv/cxv3u4whmia24aefmfuiol55m7osx2elddgy6rxbkbvmnigizet6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [4096], [131072, 4096], [131072, 4096], [132, 4096], [131072], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_0", "pid": 2338706, "tid": 2379422, + "ts": 6345940481999.828, "dur": 48.049, + "args": { + "External id": 983465,"kernel_hash": "chd4lphpu733jd27rokzqqzh23vkeai2aitqgvmlc7xjqod3vr6m", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/hd/chd4lphpu733jd27rokzqqzh23vkeai2aitqgvmlc7xjqod3vr6m.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 6056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_1", "pid": 2338706, "tid": 2379422, + "ts": 6345940482105.299, "dur": 22.253, + "args": { + "External id": 983466,"kernel_hash": "cginwarxhlqgizmvepo2jober6dgvr4lj6cd6nlltcu72kqzvxpx", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/gi/cginwarxhlqgizmvepo2jober6dgvr4lj6cd6nlltcu72kqzvxpx.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 6057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940482178.921, "dur": 13.385, + "args": { + "External id": 983467,"Record function id": 0, "Ev Idx": 6058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940482181.670, "dur": 9.704, + "args": { + "External id": 983468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940482185.168, "dur": 5.198, + "args": { + "External id": 983469,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940482186.756, "dur": 3.489, + "args": { + "External id": 983470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: StackBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940482196.897, "dur": 44.813, + "args": { + "External id": 983471,"Record function id": 0, "Sequence number": 10552664, "Fwd thread id": 1, "Ev Idx": 6062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "StackBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940482197.962, "dur": 32.544, + "args": { + "External id": 983472,"Sequence number": 10552664, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6063 + } + }, + { + "ph": "f", "id": 255, "pid": 2338706, "tid": 2379422, "ts": 6345940482197.962, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2379422, + "ts": 6345940482200.591, "dur": 11.060, + "args": { + "External id": 983473,"Record function id": 0, "Concrete Inputs": ["", "-2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940482207.857, "dur": 1.658, + "args": { + "External id": 983474,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2379422, + "ts": 6345940482212.698, "dur": 4.157, + "args": { + "External id": 983475,"Record function id": 0, "Concrete Inputs": ["", "-2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940482214.919, "dur": 0.725, + "args": { + "External id": 983476,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2379422, + "ts": 6345940482217.549, "dur": 6.086, + "args": { + "External id": 983477,"Record function id": 0, "Concrete Inputs": ["", "-2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940482221.975, "dur": 0.569, + "args": { + "External id": 983478,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2379422, + "ts": 6345940482224.208, "dur": 5.684, + "args": { + "External id": 983479,"Record function id": 0, "Concrete Inputs": ["", "-2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940482226.074, "dur": 2.670, + "args": { + "External id": 983480,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940482245.443, "dur": 6.388, + "args": { + "External id": 983481,"Record function id": 0, "Sequence number": 10552663, "Fwd thread id": 1, "Ev Idx": 6072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940482246.672, "dur": 1.419, + "args": { + "External id": 983482,"Sequence number": 10552663, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6073 + } + }, + { + "ph": "f", "id": 256, "pid": 2338706, "tid": 2379422, "ts": 6345940482246.672, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940482256.498, "dur": 534.634, + "args": { + "External id": 983483,"Record function id": 0, "Sequence number": 10552662, "Fwd thread id": 1, "Ev Idx": 6074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940482258.117, "dur": 518.407, + "args": { + "External id": 983484,"Sequence number": 10552662, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6075 + } + }, + { + "ph": "f", "id": 257, "pid": 2338706, "tid": 2379422, "ts": 6345940482258.117, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940482303.202, "dur": 12.522, + "args": { + "External id": 983485,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940482310.692, "dur": 4.647, + "args": { + "External id": 983486,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 6077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940482321.326, "dur": 7.796, + "args": { + "External id": 983487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940482325.111, "dur": 3.020, + "args": { + "External id": 983488,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940482327.168, "dur": 0.735, + "args": { + "External id": 983489,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2379422, + "ts": 6345940482334.102, "dur": 142.523, + "args": { + "External id": 983490,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 6081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940482338.083, "dur": 6.555, + "args": { + "External id": 983491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 6082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940482338.785, "dur": 4.456, + "args": { + "External id": 983492,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940482340.566, "dur": 2.508, + "args": { + "External id": 983493,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2379422, + "ts": 6345940482361.796, "dur": 113.941, + "args": { + "External id": 983494,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940482364.533, "dur": 109.856, + "args": { + "External id": 983495,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345940482483.293, "dur": 5.557, + "args": { + "External id": 983496,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 6087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940482485.965, "dur": 2.739, + "args": { + "External id": 983497,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940482530.405, "dur": 6.735, + "args": { + "External id": 983498,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940482538.731, "dur": 1.864, + "args": { + "External id": 983499,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940482541.781, "dur": 5.324, + "args": { + "External id": 983500,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940482581.570, "dur": 2.415, + "args": { + "External id": 983501,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940482582.635, "dur": 1.190, + "args": { + "External id": 983502,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338706, "tid": 2379422, + "ts": 6345940482614.474, "dur": 139.697, + "args": { + "External id": 983503,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 6094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2379422, + "ts": 6345940482623.272, "dur": 7.050, + "args": { + "External id": 983504,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940482627.925, "dur": 0.999, + "args": { + "External id": 983505,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940482632.466, "dur": 5.885, + "args": { + "External id": 983506,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 6097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940482636.553, "dur": 0.661, + "args": { + "External id": 983507,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 6098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2379422, + "ts": 6345940482640.292, "dur": 3.074, + "args": { + "External id": 983508,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940482642.369, "dur": 0.647, + "args": { + "External id": 983509,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940482644.772, "dur": 3.443, + "args": { + "External id": 983510,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940482646.722, "dur": 0.557, + "args": { + "External id": 983511,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 6102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940482653.298, "dur": 5.026, + "args": { + "External id": 983512,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 6103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940482657.352, "dur": 0.641, + "args": { + "External id": 983513,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 6104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940482659.561, "dur": 6.165, + "args": { + "External id": 983514,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 6105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940482663.314, "dur": 2.242, + "args": { + "External id": 983515,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 6106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940482669.250, "dur": 5.181, + "args": { + "External id": 983516,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 6107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940482671.364, "dur": 2.721, + "args": { + "External id": 983517,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 6108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940482675.095, "dur": 3.083, + "args": { + "External id": 983518,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940482676.131, "dur": 1.958, + "args": { + "External id": 983519,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345940482679.696, "dur": 61.353, + "args": { + "External id": 983520,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 6111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940482743.109, "dur": 1.286, + "args": { + "External id": 983521,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 6112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940482745.427, "dur": 4.088, + "args": { + "External id": 983522,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 6113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940482747.759, "dur": 0.700, + "args": { + "External id": 983523,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 6114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940482751.806, "dur": 1.245, + "args": { + "External id": 983524,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 6115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940482802.122, "dur": 10.332, + "args": { + "External id": 983525,"Record function id": 0, "Ev Idx": 6116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940482804.457, "dur": 7.034, + "args": { + "External id": 983526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940482806.893, "dur": 3.590, + "args": { + "External id": 983527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940482807.879, "dur": 2.485, + "args": { + "External id": 983528,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940482816.306, "dur": 9.386, + "args": { + "External id": 983529,"Record function id": 0, "Sequence number": 10552661, "Fwd thread id": 1, "Ev Idx": 6120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940482817.752, "dur": 4.376, + "args": { + "External id": 983530,"Sequence number": 10552661, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6121 + } + }, + { + "ph": "f", "id": 258, "pid": 2338706, "tid": 2379422, "ts": 6345940482817.752, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940482819.608, "dur": 2.351, + "args": { + "External id": 983531,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940482820.728, "dur": 1.078, + "args": { + "External id": 983532,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940482829.348, "dur": 122.683, + "args": { + "External id": 983533,"Record function id": 0, "Sequence number": 10552660, "Fwd thread id": 1, "Ev Idx": 6124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940482830.773, "dur": 112.753, + "args": { + "External id": 983534,"Sequence number": 10552660, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6125 + } + }, + { + "ph": "f", "id": 259, "pid": 2338706, "tid": 2379422, "ts": 6345940482830.773, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940482835.622, "dur": 6.198, + "args": { + "External id": 983535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940482836.806, "dur": 4.362, + "args": { + "External id": 983536,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940482840.351, "dur": 0.667, + "args": { + "External id": 983537,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940482842.894, "dur": 44.812, + "args": { + "External id": 983538,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940482889.078, "dur": 4.880, + "args": { + "External id": 983539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940482889.842, "dur": 3.489, + "args": { + "External id": 983540,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940482891.624, "dur": 1.538, + "args": { + "External id": 983541,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940482895.848, "dur": 5.685, + "args": { + "External id": 983542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940482897.311, "dur": 3.607, + "args": { + "External id": 983543,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940482900.141, "dur": 0.678, + "args": { + "External id": 983544,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940482902.286, "dur": 40.325, + "args": { + "External id": 983545,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940482956.802, "dur": 8.398, + "args": { + "External id": 983546,"Record function id": 0, "Sequence number": 10552659, "Fwd thread id": 1, "Ev Idx": 6137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940482957.874, "dur": 5.426, + "args": { + "External id": 983547,"Sequence number": 10552659, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6138 + } + }, + { + "ph": "f", "id": 260, "pid": 2338706, "tid": 2379422, "ts": 6345940482957.874, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940482960.409, "dur": 2.735, + "args": { + "External id": 983548,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940482962.090, "dur": 0.899, + "args": { + "External id": 983549,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940482969.185, "dur": 156.364, + "args": { + "External id": 983550,"Record function id": 0, "Sequence number": 10552658, "Fwd thread id": 1, "Ev Idx": 6141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940482970.494, "dur": 150.106, + "args": { + "External id": 983551,"Sequence number": 10552658, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6142 + } + }, + { + "ph": "f", "id": 261, "pid": 2338706, "tid": 2379422, "ts": 6345940482970.494, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940482971.295, "dur": 149.021, + "args": { + "External id": 983552,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940483104.557, "dur": 14.709, + "args": { + "External id": 988161,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940483117.937, "dur": 0.936, + "args": { + "External id": 988162,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940483132.755, "dur": 7.654, + "args": { + "External id": 988163,"Record function id": 0, "Ev Idx": 6146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940483134.560, "dur": 5.212, + "args": { + "External id": 988164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940483136.365, "dur": 3.088, + "args": { + "External id": 988165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940483137.430, "dur": 1.892, + "args": { + "External id": 988166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940483143.728, "dur": 7.521, + "args": { + "External id": 988167,"Record function id": 0, "Sequence number": 10552657, "Fwd thread id": 1, "Ev Idx": 6150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940483144.822, "dur": 3.640, + "args": { + "External id": 988168,"Sequence number": 10552657, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6151 + } + }, + { + "ph": "f", "id": 262, "pid": 2338706, "tid": 2379422, "ts": 6345940483144.822, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940483146.129, "dur": 2.175, + "args": { + "External id": 988169,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940483146.886, "dur": 1.328, + "args": { + "External id": 988170,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940483154.633, "dur": 142.840, + "args": { + "External id": 988171,"Record function id": 0, "Sequence number": 10552656, "Fwd thread id": 1, "Ev Idx": 6154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940483157.642, "dur": 128.136, + "args": { + "External id": 988172,"Sequence number": 10552656, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6155 + } + }, + { + "ph": "f", "id": 263, "pid": 2338706, "tid": 2379422, "ts": 6345940483157.642, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940483161.038, "dur": 3.124, + "args": { + "External id": 988173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940483161.746, "dur": 1.882, + "args": { + "External id": 988174,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940483162.975, "dur": 0.539, + "args": { + "External id": 988175,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940483164.945, "dur": 62.819, + "args": { + "External id": 988176,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940483229.029, "dur": 5.872, + "args": { + "External id": 988177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940483229.814, "dur": 4.503, + "args": { + "External id": 988178,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940483233.670, "dur": 0.525, + "args": { + "External id": 988179,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940483236.022, "dur": 5.340, + "args": { + "External id": 988180,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940483236.744, "dur": 3.967, + "args": { + "External id": 988181,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940483237.917, "dur": 2.730, + "args": { + "External id": 988182,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940483241.762, "dur": 43.265, + "args": { + "External id": 988183,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940483303.531, "dur": 39.793, + "args": { + "External id": 988184,"Record function id": 0, "Sequence number": 10552655, "Fwd thread id": 1, "Ev Idx": 6167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940483304.639, "dur": 5.867, + "args": { + "External id": 988185,"Sequence number": 10552655, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6168 + } + }, + { + "ph": "f", "id": 264, "pid": 2338706, "tid": 2379422, "ts": 6345940483304.639, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940483306.143, "dur": 4.180, + "args": { + "External id": 988186,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940483307.174, "dur": 2.966, + "args": { + "External id": 988187,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2379422, + "ts": 6345940483314.002, "dur": 26.461, + "args": { + "External id": 988188,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940483347.614, "dur": 10.400, + "args": { + "External id": 988189,"Record function id": 0, "Sequence number": 10552654, "Fwd thread id": 1, "Ev Idx": 6172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940483348.415, "dur": 7.412, + "args": { + "External id": 988190,"Sequence number": 10552654, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6173 + } + }, + { + "ph": "f", "id": 265, "pid": 2338706, "tid": 2379422, "ts": 6345940483348.415, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940483351.487, "dur": 4.114, + "args": { + "External id": 988191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940483352.440, "dur": 2.547, + "args": { + "External id": 988192,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940483354.177, "dur": 0.657, + "args": { + "External id": 988193,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940483362.148, "dur": 4.986, + "args": { + "External id": 988194,"Record function id": 0, "Ev Idx": 6177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940483363.521, "dur": 3.095, + "args": { + "External id": 988195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940483364.436, "dur": 1.850, + "args": { + "External id": 988196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940483364.906, "dur": 1.231, + "args": { + "External id": 988197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940483371.803, "dur": 457.445, + "args": { + "External id": 988198,"Record function id": 0, "Sequence number": 10552653, "Fwd thread id": 1, "Ev Idx": 6181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940483373.565, "dur": 436.667, + "args": { + "External id": 988199,"Sequence number": 10552653, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 6182 + } + }, + { + "ph": "f", "id": 266, "pid": 2338706, "tid": 2379422, "ts": 6345940483373.565, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2379422, + "ts": 6345940483406.740, "dur": 37.070, + "args": { + "External id": 988200,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345940483408.712, "dur": 34.831, + "args": { + "External id": 988201,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940483412.101, "dur": 7.213, + "args": { + "External id": 988202,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940483415.549, "dur": 3.162, + "args": { + "External id": 988203,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940483420.691, "dur": 22.337, + "args": { + "External id": 988204,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940483456.565, "dur": 3.932, + "args": { + "External id": 988205,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940483459.128, "dur": 1.225, + "args": { + "External id": 988206,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940483464.712, "dur": 1.513, + "args": { + "External id": 988207,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940483465.444, "dur": 0.688, + "args": { + "External id": 988208,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940483479.848, "dur": 4.669, + "args": { + "External id": 988209,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940483501.047, "dur": 3.110, + "args": { + "External id": 988210,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940483697.188, "dur": 2.589, + "args": { + "External id": 988211,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940483704.327, "dur": 34.962, + "args": { + "External id": 988212,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940483716.378, "dur": 0.864, + "args": { + "External id": 988213,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345940483745.586, "dur": 34.091, + "args": { + "External id": 988214,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940483747.489, "dur": 31.978, + "args": { + "External id": 988215,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940483752.064, "dur": 4.379, + "args": { + "External id": 988216,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940483760.190, "dur": 18.786, + "args": { + "External id": 988217,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345940483784.527, "dur": 2.611, + "args": { + "External id": 988218,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940483785.976, "dur": 1.033, + "args": { + "External id": 988219,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940483793.954, "dur": 2.433, + "args": { + "External id": 988220,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940483795.302, "dur": 0.974, + "args": { + "External id": 988221,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940483799.002, "dur": 2.074, + "args": { + "External id": 988222,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940483800.229, "dur": 0.755, + "args": { + "External id": 988223,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940483839.504, "dur": 10.465, + "args": { + "External id": 988224,"Record function id": 0, "Ev Idx": 6207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940483841.550, "dur": 7.657, + "args": { + "External id": 988225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940483843.420, "dur": 4.789, + "args": { + "External id": 988226,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940483844.779, "dur": 3.320, + "args": { + "External id": 988227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940483853.607, "dur": 9.560, + "args": { + "External id": 988228,"Record function id": 0, "Sequence number": 10552652, "Fwd thread id": 1, "Ev Idx": 6211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940483854.889, "dur": 4.295, + "args": { + "External id": 988229,"Sequence number": 10552652, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6212 + } + }, + { + "ph": "f", "id": 267, "pid": 2338706, "tid": 2379422, "ts": 6345940483854.889, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940483856.640, "dur": 2.356, + "args": { + "External id": 988230,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940483857.681, "dur": 1.187, + "args": { + "External id": 988231,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940483866.669, "dur": 168.900, + "args": { + "External id": 988232,"Record function id": 0, "Sequence number": 10552651, "Fwd thread id": 1, "Ev Idx": 6215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940483867.960, "dur": 160.296, + "args": { + "External id": 988233,"Sequence number": 10552651, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6216 + } + }, + { + "ph": "f", "id": 268, "pid": 2338706, "tid": 2379422, "ts": 6345940483867.960, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940483874.069, "dur": 4.340, + "args": { + "External id": 988234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940483875.461, "dur": 2.405, + "args": { + "External id": 988235,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940483876.922, "dur": 0.738, + "args": { + "External id": 988236,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940483879.830, "dur": 71.171, + "args": { + "External id": 988237,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940483952.167, "dur": 4.078, + "args": { + "External id": 988238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940483953.037, "dur": 2.608, + "args": { + "External id": 988239,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940483954.617, "dur": 0.883, + "args": { + "External id": 988240,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940483960.292, "dur": 5.969, + "args": { + "External id": 988241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940483961.186, "dur": 4.438, + "args": { + "External id": 988242,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940483962.480, "dur": 3.070, + "args": { + "External id": 988243,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940483966.962, "dur": 59.670, + "args": { + "External id": 988244,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940484042.936, "dur": 9.025, + "args": { + "External id": 988245,"Record function id": 0, "Sequence number": 10552650, "Fwd thread id": 1, "Ev Idx": 6228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940484044.197, "dur": 5.243, + "args": { + "External id": 988246,"Sequence number": 10552650, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6229 + } + }, + { + "ph": "f", "id": 269, "pid": 2338706, "tid": 2379422, "ts": 6345940484044.197, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940484046.409, "dur": 2.873, + "args": { + "External id": 988247,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940484047.590, "dur": 1.530, + "args": { + "External id": 988248,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940484094.532, "dur": 13.399, + "args": { + "External id": 988249,"Record function id": 0, "Sequence number": 10552649, "Fwd thread id": 1, "Ev Idx": 6232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940484095.946, "dur": 8.764, + "args": { + "External id": 988250,"Sequence number": 10552649, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6233 + } + }, + { + "ph": "f", "id": 270, "pid": 2338706, "tid": 2379422, "ts": 6345940484095.946, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940484097.349, "dur": 7.114, + "args": { + "External id": 988251,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940484098.426, "dur": 5.344, + "args": { + "External id": 988252,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940484102.717, "dur": 0.863, + "args": { + "External id": 988253,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940484112.213, "dur": 7.899, + "args": { + "External id": 988254,"Record function id": 0, "Ev Idx": 6237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940484113.857, "dur": 5.750, + "args": { + "External id": 988255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940484116.264, "dur": 2.967, + "args": { + "External id": 988256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940484117.416, "dur": 1.711, + "args": { + "External id": 988257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940484123.067, "dur": 8.218, + "args": { + "External id": 988258,"Record function id": 0, "Sequence number": 10552648, "Fwd thread id": 1, "Ev Idx": 6241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940484124.167, "dur": 4.041, + "args": { + "External id": 988259,"Sequence number": 10552648, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6242 + } + }, + { + "ph": "f", "id": 271, "pid": 2338706, "tid": 2379422, "ts": 6345940484124.167, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940484125.663, "dur": 2.412, + "args": { + "External id": 988260,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940484126.532, "dur": 1.387, + "args": { + "External id": 988261,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940484136.716, "dur": 457.274, + "args": { + "External id": 988262,"Record function id": 0, "Sequence number": 10552647, "Fwd thread id": 1, "Ev Idx": 6245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940484138.293, "dur": 432.325, + "args": { + "External id": 988263,"Sequence number": 10552647, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6246 + } + }, + { + "ph": "f", "id": 272, "pid": 2338706, "tid": 2379422, "ts": 6345940484138.293, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940484159.227, "dur": 9.190, + "args": { + "External id": 988264,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940484162.310, "dur": 5.533, + "args": { + "External id": 988265,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940484171.265, "dur": 5.138, + "args": { + "External id": 988266,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940484173.363, "dur": 2.814, + "args": { + "External id": 988267,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940484178.450, "dur": 6.694, + "args": { + "External id": 988268,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940484180.081, "dur": 4.846, + "args": { + "External id": 988269,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940484254.184, "dur": 287.931, + "args": { + "External id": 988270,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 6253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940484361.615, "dur": 3.734, + "args": { + "External id": 988271,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940484367.133, "dur": 2.149, + "args": { + "External id": 988272,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940484370.664, "dur": 2.251, + "args": { + "External id": 988273,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940484374.191, "dur": 2.697, + "args": { + "External id": 988274,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940484439.130, "dur": 4.651, + "args": { + "External id": 988275,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940484442.134, "dur": 1.427, + "args": { + "External id": 988276,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940484448.038, "dur": 28.949, + "args": { + "External id": 988277,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940484454.690, "dur": 0.772, + "args": { + "External id": 988278,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940484478.499, "dur": 1.719, + "args": { + "External id": 988279,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940484479.273, "dur": 0.858, + "args": { + "External id": 988280,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940484481.261, "dur": 14.002, + "args": { + "External id": 988281,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940484483.182, "dur": 0.671, + "args": { + "External id": 988282,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940484557.214, "dur": 3.832, + "args": { + "External id": 988283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940484564.076, "dur": 0.701, + "args": { + "External id": 988284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940484566.861, "dur": 0.713, + "args": { + "External id": 988285,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940484603.027, "dur": 251.811, + "args": { + "External id": 988286,"Record function id": 0, "Sequence number": 10552646, "Fwd thread id": 1, "Ev Idx": 6269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940484604.589, "dur": 241.823, + "args": { + "External id": 988287,"Sequence number": 10552646, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6270 + } + }, + { + "ph": "f", "id": 273, "pid": 2338706, "tid": 2379422, "ts": 6345940484604.589, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345940484629.927, "dur": 47.277, + "args": { + "External id": 988288,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940484633.769, "dur": 3.329, + "args": { + "External id": 988289,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940484638.660, "dur": 37.847, + "args": { + "External id": 988290,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 6273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940484688.442, "dur": 4.643, + "args": { + "External id": 988291,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940484690.538, "dur": 2.297, + "args": { + "External id": 988292,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940484862.050, "dur": 256.469, + "args": { + "External id": 988293,"Record function id": 0, "Sequence number": 10552645, "Fwd thread id": 1, "Ev Idx": 6276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940484864.016, "dur": 243.801, + "args": { + "External id": 988294,"Sequence number": 10552645, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6277 + } + }, + { + "ph": "f", "id": 274, "pid": 2338706, "tid": 2379422, "ts": 6345940484864.016, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345940484876.273, "dur": 51.941, + "args": { + "External id": 988295,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940484879.374, "dur": 3.004, + "args": { + "External id": 988296,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940484883.873, "dur": 43.795, + "args": { + "External id": 988297,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 6280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940484935.595, "dur": 8.174, + "args": { + "External id": 988298,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940484937.708, "dur": 5.739, + "args": { + "External id": 988299,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485128.380, "dur": 16.531, + "args": { + "External id": 988300,"Record function id": 0, "Sequence number": 10552644, "Fwd thread id": 1, "Ev Idx": 6283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485130.453, "dur": 10.965, + "args": { + "External id": 988301,"Sequence number": 10552644, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6284 + } + }, + { + "ph": "f", "id": 275, "pid": 2338706, "tid": 2379422, "ts": 6345940485130.453, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940485133.627, "dur": 7.533, + "args": { + "External id": 988302,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940485135.656, "dur": 5.336, + "args": { + "External id": 988303,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485148.155, "dur": 7.714, + "args": { + "External id": 988304,"Record function id": 0, "Sequence number": 10552643, "Fwd thread id": 1, "Ev Idx": 6287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485149.364, "dur": 4.304, + "args": { + "External id": 988305,"Sequence number": 10552643, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6288 + } + }, + { + "ph": "f", "id": 276, "pid": 2338706, "tid": 2379422, "ts": 6345940485149.364, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940485151.020, "dur": 2.500, + "args": { + "External id": 988306,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940485152.385, "dur": 1.020, + "args": { + "External id": 988307,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485159.197, "dur": 24.235, + "args": { + "External id": 988308,"Record function id": 0, "Sequence number": 10552642, "Fwd thread id": 1, "Ev Idx": 6291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485174.083, "dur": 6.529, + "args": { + "External id": 988309,"Sequence number": 10552642, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6292 + } + }, + { + "ph": "f", "id": 277, "pid": 2338706, "tid": 2379422, "ts": 6345940485174.083, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940485178.025, "dur": 2.451, + "args": { + "External id": 988310,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940485178.938, "dur": 1.387, + "args": { + "External id": 988311,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485187.025, "dur": 8.364, + "args": { + "External id": 988312,"Record function id": 0, "Sequence number": 10552641, "Fwd thread id": 1, "Ev Idx": 6295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485188.359, "dur": 4.089, + "args": { + "External id": 988313,"Sequence number": 10552641, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6296 + } + }, + { + "ph": "f", "id": 278, "pid": 2338706, "tid": 2379422, "ts": 6345940485188.359, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940485190.052, "dur": 2.254, + "args": { + "External id": 988314,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940485191.171, "dur": 0.979, + "args": { + "External id": 988315,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485198.596, "dur": 169.907, + "args": { + "External id": 988316,"Record function id": 0, "Sequence number": 10552640, "Fwd thread id": 1, "Ev Idx": 6299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485199.478, "dur": 161.071, + "args": { + "External id": 988317,"Sequence number": 10552640, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6300 + } + }, + { + "ph": "f", "id": 279, "pid": 2338706, "tid": 2379422, "ts": 6345940485199.478, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940485204.453, "dur": 10.113, + "args": { + "External id": 988318,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940485208.638, "dur": 5.165, + "args": { + "External id": 988319,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940485211.363, "dur": 2.056, + "args": { + "External id": 988320,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940485216.301, "dur": 76.828, + "args": { + "External id": 988321,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940485294.478, "dur": 6.563, + "args": { + "External id": 988322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940485295.230, "dur": 5.054, + "args": { + "External id": 988323,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940485297.556, "dur": 2.553, + "args": { + "External id": 988324,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940485304.850, "dur": 5.816, + "args": { + "External id": 988325,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940485305.981, "dur": 4.189, + "args": { + "External id": 988326,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940485307.301, "dur": 2.752, + "args": { + "External id": 988327,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940485311.562, "dur": 47.836, + "args": { + "External id": 988328,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485373.254, "dur": 8.180, + "args": { + "External id": 988329,"Record function id": 0, "Sequence number": 10552639, "Fwd thread id": 1, "Ev Idx": 6312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485374.256, "dur": 4.587, + "args": { + "External id": 988330,"Sequence number": 10552639, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6313 + } + }, + { + "ph": "f", "id": 280, "pid": 2338706, "tid": 2379422, "ts": 6345940485374.256, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940485376.115, "dur": 2.586, + "args": { + "External id": 988331,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940485377.562, "dur": 0.994, + "args": { + "External id": 988332,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485384.907, "dur": 11.154, + "args": { + "External id": 988333,"Record function id": 0, "Sequence number": 10552638, "Fwd thread id": 1, "Ev Idx": 6316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485385.795, "dur": 7.383, + "args": { + "External id": 988334,"Sequence number": 10552638, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6317 + } + }, + { + "ph": "f", "id": 281, "pid": 2338706, "tid": 2379422, "ts": 6345940485385.795, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940485389.196, "dur": 3.775, + "args": { + "External id": 988335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940485390.039, "dur": 2.396, + "args": { + "External id": 988336,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940485391.783, "dur": 0.531, + "args": { + "External id": 988337,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940485402.201, "dur": 9.562, + "args": { + "External id": 988338,"Record function id": 0, "Ev Idx": 6321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940485403.928, "dur": 6.983, + "args": { + "External id": 988339,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940485406.465, "dur": 3.865, + "args": { + "External id": 988340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940485407.417, "dur": 2.811, + "args": { + "External id": 988341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485414.888, "dur": 6.955, + "args": { + "External id": 988342,"Record function id": 0, "Sequence number": 10552637, "Fwd thread id": 1, "Ev Idx": 6325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485416.265, "dur": 3.147, + "args": { + "External id": 988343,"Sequence number": 10552637, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6326 + } + }, + { + "ph": "f", "id": 282, "pid": 2338706, "tid": 2379422, "ts": 6345940485416.265, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940485417.614, "dur": 1.657, + "args": { + "External id": 988344,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940485418.506, "dur": 0.677, + "args": { + "External id": 988345,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485426.202, "dur": 102.539, + "args": { + "External id": 988346,"Record function id": 0, "Sequence number": 10552636, "Fwd thread id": 1, "Ev Idx": 6329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485429.567, "dur": 90.816, + "args": { + "External id": 988347,"Sequence number": 10552636, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6330 + } + }, + { + "ph": "f", "id": 283, "pid": 2338706, "tid": 2379422, "ts": 6345940485429.567, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940485432.809, "dur": 2.736, + "args": { + "External id": 988348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940485433.266, "dur": 1.836, + "args": { + "External id": 988349,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940485434.545, "dur": 0.453, + "args": { + "External id": 988350,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940485436.531, "dur": 27.977, + "args": { + "External id": 988351,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940485465.634, "dur": 12.252, + "args": { + "External id": 988352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940485466.257, "dur": 11.112, + "args": { + "External id": 988353,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940485473.181, "dur": 4.048, + "args": { + "External id": 988354,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940485479.507, "dur": 3.354, + "args": { + "External id": 988355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940485480.357, "dur": 1.890, + "args": { + "External id": 988356,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940485481.693, "dur": 0.468, + "args": { + "External id": 988357,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940485483.304, "dur": 36.181, + "args": { + "External id": 988358,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485533.656, "dur": 35.592, + "args": { + "External id": 988359,"Record function id": 0, "Sequence number": 10552635, "Fwd thread id": 1, "Ev Idx": 6342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485534.801, "dur": 5.326, + "args": { + "External id": 988360,"Sequence number": 10552635, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6343 + } + }, + { + "ph": "f", "id": 284, "pid": 2338706, "tid": 2379422, "ts": 6345940485534.801, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940485537.483, "dur": 2.511, + "args": { + "External id": 988361,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940485538.624, "dur": 1.221, + "args": { + "External id": 988362,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2379422, + "ts": 6345940485543.225, "dur": 23.306, + "args": { + "External id": 988363,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485575.683, "dur": 7.810, + "args": { + "External id": 988364,"Record function id": 0, "Sequence number": 10552634, "Fwd thread id": 1, "Ev Idx": 6347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485576.788, "dur": 4.259, + "args": { + "External id": 988365,"Sequence number": 10552634, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6348 + } + }, + { + "ph": "f", "id": 285, "pid": 2338706, "tid": 2379422, "ts": 6345940485576.788, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940485577.605, "dur": 3.217, + "args": { + "External id": 988366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940485578.142, "dur": 2.126, + "args": { + "External id": 988367,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940485579.625, "dur": 0.543, + "args": { + "External id": 988368,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940485587.680, "dur": 5.210, + "args": { + "External id": 988369,"Record function id": 0, "Ev Idx": 6352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940485588.960, "dur": 3.308, + "args": { + "External id": 988370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940485590.148, "dur": 1.704, + "args": { + "External id": 988371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940485590.618, "dur": 1.130, + "args": { + "External id": 988372,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485595.894, "dur": 9.675, + "args": { + "External id": 988373,"Record function id": 0, "Sequence number": 10552633, "Fwd thread id": 1, "Ev Idx": 6356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485597.539, "dur": 5.775, + "args": { + "External id": 988374,"Sequence number": 10552633, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6357 + } + }, + { + "ph": "f", "id": 286, "pid": 2338706, "tid": 2379422, "ts": 6345940485597.539, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940485599.230, "dur": 3.924, + "args": { + "External id": 988375,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940485602.160, "dur": 0.875, + "args": { + "External id": 988376,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485608.683, "dur": 103.126, + "args": { + "External id": 988377,"Record function id": 0, "Sequence number": 10552632, "Fwd thread id": 1, "Ev Idx": 6360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485609.508, "dur": 93.401, + "args": { + "External id": 988378,"Sequence number": 10552632, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6361 + } + }, + { + "ph": "f", "id": 287, "pid": 2338706, "tid": 2379422, "ts": 6345940485609.508, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940485612.986, "dur": 5.546, + "args": { + "External id": 988379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940485613.407, "dur": 4.654, + "args": { + "External id": 988380,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940485615.109, "dur": 2.843, + "args": { + "External id": 988381,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940485621.167, "dur": 35.721, + "args": { + "External id": 988382,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940485658.115, "dur": 4.333, + "args": { + "External id": 988383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940485658.679, "dur": 3.229, + "args": { + "External id": 988384,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940485660.540, "dur": 1.251, + "args": { + "External id": 988385,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940485663.963, "dur": 3.751, + "args": { + "External id": 988386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940485664.948, "dur": 2.100, + "args": { + "External id": 988387,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940485666.495, "dur": 0.486, + "args": { + "External id": 988388,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940485670.135, "dur": 32.028, + "args": { + "External id": 988389,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485716.029, "dur": 28.463, + "args": { + "External id": 988390,"Record function id": 0, "Sequence number": 10552631, "Fwd thread id": 1, "Ev Idx": 6373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485717.138, "dur": 4.723, + "args": { + "External id": 988391,"Sequence number": 10552631, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6374 + } + }, + { + "ph": "f", "id": 288, "pid": 2338706, "tid": 2379422, "ts": 6345940485717.138, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940485719.123, "dur": 2.592, + "args": { + "External id": 988392,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940485720.568, "dur": 1.017, + "args": { + "External id": 988393,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940485724.084, "dur": 18.125, + "args": { + "External id": 988394,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485748.768, "dur": 10.878, + "args": { + "External id": 988395,"Record function id": 0, "Sequence number": 10552630, "Fwd thread id": 1, "Ev Idx": 6378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940485749.725, "dur": 7.278, + "args": { + "External id": 988396,"Sequence number": 10552630, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6379 + } + }, + { + "ph": "f", "id": 289, "pid": 2338706, "tid": 2379422, "ts": 6345940485749.725, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940485750.941, "dur": 5.870, + "args": { + "External id": 988397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940485751.671, "dur": 4.597, + "args": { + "External id": 988398,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940485755.654, "dur": 0.501, + "args": { + "External id": 988399,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940485763.722, "dur": 8.116, + "args": { + "External id": 988400,"Record function id": 0, "Ev Idx": 6383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940485765.114, "dur": 6.214, + "args": { + "External id": 988401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940485766.191, "dur": 4.604, + "args": { + "External id": 988402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940485767.236, "dur": 3.472, + "args": { + "External id": 988403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940485775.763, "dur": 441.953, + "args": { + "External id": 988404,"Record function id": 0, "Sequence number": 10552629, "Fwd thread id": 1, "Ev Idx": 6387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940485777.405, "dur": 401.143, + "args": { + "External id": 988405,"Sequence number": 10552629, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6388 + } + }, + { + "ph": "f", "id": 290, "pid": 2338706, "tid": 2379422, "ts": 6345940485777.405, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940485812.336, "dur": 2.185, + "args": { + "External id": 988406,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940485813.088, "dur": 1.291, + "args": { + "External id": 988407,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940485832.194, "dur": 3.936, + "args": { + "External id": 988408,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940485846.289, "dur": 2.050, + "args": { + "External id": 988409,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940486004.749, "dur": 1.808, + "args": { + "External id": 988410,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940486029.471, "dur": 77.348, + "args": { + "External id": 988411,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940486040.943, "dur": 1.073, + "args": { + "External id": 988412,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345940486115.105, "dur": 35.086, + "args": { + "External id": 988413,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940486117.190, "dur": 32.791, + "args": { + "External id": 988414,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940486123.595, "dur": 5.043, + "args": { + "External id": 988415,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940486130.232, "dur": 19.220, + "args": { + "External id": 988416,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345940486155.097, "dur": 3.051, + "args": { + "External id": 988417,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940486156.609, "dur": 1.433, + "args": { + "External id": 988418,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940486165.189, "dur": 4.689, + "args": { + "External id": 988419,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940486166.292, "dur": 3.444, + "args": { + "External id": 988420,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2379422, + "ts": 6345940486191.460, "dur": 19.098, + "args": { + "External id": 988421,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940486231.445, "dur": 12.911, + "args": { + "External id": 988422,"Record function id": 0, "Ev Idx": 6405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940486234.224, "dur": 9.336, + "args": { + "External id": 988423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940486237.105, "dur": 5.221, + "args": { + "External id": 988424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940486238.015, "dur": 4.196, + "args": { + "External id": 988425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940486248.148, "dur": 8.606, + "args": { + "External id": 988426,"Record function id": 0, "Sequence number": 10552628, "Fwd thread id": 1, "Ev Idx": 6409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940486249.423, "dur": 1.945, + "args": { + "External id": 988427,"Sequence number": 10552628, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6410 + } + }, + { + "ph": "f", "id": 291, "pid": 2338706, "tid": 2379422, "ts": 6345940486249.423, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940486261.006, "dur": 422.541, + "args": { + "External id": 988428,"Record function id": 0, "Sequence number": 10552627, "Fwd thread id": 1, "Ev Idx": 6411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940486262.106, "dur": 408.904, + "args": { + "External id": 988429,"Sequence number": 10552627, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6412 + } + }, + { + "ph": "f", "id": 292, "pid": 2338706, "tid": 2379422, "ts": 6345940486262.106, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940486298.158, "dur": 8.482, + "args": { + "External id": 988430,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940486303.237, "dur": 3.164, + "args": { + "External id": 988431,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 6414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940486309.923, "dur": 5.717, + "args": { + "External id": 988432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940486311.732, "dur": 3.335, + "args": { + "External id": 988433,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940486313.763, "dur": 1.140, + "args": { + "External id": 988434,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2379422, + "ts": 6345940486319.003, "dur": 91.653, + "args": { + "External id": 988435,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 6418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940486322.080, "dur": 3.615, + "args": { + "External id": 988436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 6419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940486322.777, "dur": 2.281, + "args": { + "External id": 988437,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940486324.234, "dur": 0.678, + "args": { + "External id": 988438,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2379422, + "ts": 6345940486326.834, "dur": 83.330, + "args": { + "External id": 988439,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940486328.286, "dur": 80.832, + "args": { + "External id": 988440,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345940486414.541, "dur": 3.011, + "args": { + "External id": 988441,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 6424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940486416.010, "dur": 1.420, + "args": { + "External id": 988442,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940486455.279, "dur": 6.308, + "args": { + "External id": 988443,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940486462.945, "dur": 2.455, + "args": { + "External id": 988444,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940486466.469, "dur": 2.431, + "args": { + "External id": 988445,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940486502.452, "dur": 2.378, + "args": { + "External id": 988446,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940486503.530, "dur": 1.158, + "args": { + "External id": 988447,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338706, "tid": 2379422, + "ts": 6345940486529.839, "dur": 120.189, + "args": { + "External id": 988448,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 6431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2379422, + "ts": 6345940486537.252, "dur": 5.146, + "args": { + "External id": 988449,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940486540.622, "dur": 0.880, + "args": { + "External id": 988450,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940486544.160, "dur": 4.668, + "args": { + "External id": 988451,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 6434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940486547.247, "dur": 0.467, + "args": { + "External id": 988452,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 6435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2379422, + "ts": 6345940486550.148, "dur": 2.361, + "args": { + "External id": 988453,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940486551.818, "dur": 0.336, + "args": { + "External id": 988454,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940486553.412, "dur": 3.136, + "args": { + "External id": 988455,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940486555.354, "dur": 0.720, + "args": { + "External id": 988456,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 6439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940486560.011, "dur": 7.745, + "args": { + "External id": 988457,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 6440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940486564.542, "dur": 2.842, + "args": { + "External id": 988458,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 6441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940486569.072, "dur": 6.178, + "args": { + "External id": 988459,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 6442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940486573.256, "dur": 1.782, + "args": { + "External id": 988460,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 6443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940486578.828, "dur": 2.650, + "args": { + "External id": 988461,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 6444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940486580.218, "dur": 0.908, + "args": { + "External id": 988462,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 6445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940486582.389, "dur": 3.589, + "args": { + "External id": 988463,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940486583.944, "dur": 1.947, + "args": { + "External id": 988464,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345940486587.036, "dur": 51.455, + "args": { + "External id": 988465,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 6448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940486640.160, "dur": 1.161, + "args": { + "External id": 988466,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 6449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940486642.162, "dur": 3.970, + "args": { + "External id": 988467,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 6450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940486644.624, "dur": 0.492, + "args": { + "External id": 988468,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 6451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940486648.261, "dur": 0.713, + "args": { + "External id": 988469,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 6452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940486693.079, "dur": 10.622, + "args": { + "External id": 988470,"Record function id": 0, "Ev Idx": 6453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940486694.962, "dur": 7.956, + "args": { + "External id": 988471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940486696.610, "dur": 5.377, + "args": { + "External id": 988472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940486697.471, "dur": 4.407, + "args": { + "External id": 988473,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940486707.245, "dur": 6.913, + "args": { + "External id": 988474,"Record function id": 0, "Sequence number": 10552626, "Fwd thread id": 1, "Ev Idx": 6457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940486708.557, "dur": 3.554, + "args": { + "External id": 988475,"Sequence number": 10552626, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6458 + } + }, + { + "ph": "f", "id": 293, "pid": 2338706, "tid": 2379422, "ts": 6345940486708.557, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940486710.246, "dur": 1.658, + "args": { + "External id": 988476,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940486711.028, "dur": 0.736, + "args": { + "External id": 988477,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940486717.418, "dur": 113.198, + "args": { + "External id": 988478,"Record function id": 0, "Sequence number": 10552625, "Fwd thread id": 1, "Ev Idx": 6461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940486718.671, "dur": 105.270, + "args": { + "External id": 988479,"Sequence number": 10552625, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6462 + } + }, + { + "ph": "f", "id": 294, "pid": 2338706, "tid": 2379422, "ts": 6345940486718.671, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940486721.555, "dur": 6.043, + "args": { + "External id": 988480,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940486722.647, "dur": 4.378, + "args": { + "External id": 988481,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940486726.369, "dur": 0.523, + "args": { + "External id": 988482,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940486728.582, "dur": 46.707, + "args": { + "External id": 988483,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940486776.653, "dur": 3.955, + "args": { + "External id": 988484,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940486777.345, "dur": 2.725, + "args": { + "External id": 988485,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940486779.113, "dur": 0.825, + "args": { + "External id": 988486,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940486782.578, "dur": 4.884, + "args": { + "External id": 988487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940486783.437, "dur": 3.487, + "args": { + "External id": 988488,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940486786.585, "dur": 0.274, + "args": { + "External id": 988489,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940486788.083, "dur": 34.895, + "args": { + "External id": 988490,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940486835.336, "dur": 7.274, + "args": { + "External id": 988491,"Record function id": 0, "Sequence number": 10552624, "Fwd thread id": 1, "Ev Idx": 6474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940486836.337, "dur": 4.934, + "args": { + "External id": 988492,"Sequence number": 10552624, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6475 + } + }, + { + "ph": "f", "id": 295, "pid": 2338706, "tid": 2379422, "ts": 6345940486836.337, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940486838.551, "dur": 2.570, + "args": { + "External id": 988493,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940486839.799, "dur": 1.167, + "args": { + "External id": 988494,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940486845.995, "dur": 13.519, + "args": { + "External id": 988495,"Record function id": 0, "Sequence number": 10552623, "Fwd thread id": 1, "Ev Idx": 6478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940486846.954, "dur": 8.918, + "args": { + "External id": 988496,"Sequence number": 10552623, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6479 + } + }, + { + "ph": "f", "id": 296, "pid": 2338706, "tid": 2379422, "ts": 6345940486846.954, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940486848.029, "dur": 7.616, + "args": { + "External id": 988497,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940486851.045, "dur": 4.113, + "args": { + "External id": 988498,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940486852.470, "dur": 2.611, + "args": { + "External id": 988499,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940486863.552, "dur": 5.222, + "args": { + "External id": 988500,"Record function id": 0, "Ev Idx": 6483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940486865.184, "dur": 3.118, + "args": { + "External id": 988501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940486866.105, "dur": 1.972, + "args": { + "External id": 988502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940486866.999, "dur": 0.986, + "args": { + "External id": 988503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940486871.712, "dur": 6.287, + "args": { + "External id": 988504,"Record function id": 0, "Sequence number": 10552622, "Fwd thread id": 1, "Ev Idx": 6487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940486872.870, "dur": 2.789, + "args": { + "External id": 988505,"Sequence number": 10552622, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6488 + } + }, + { + "ph": "f", "id": 297, "pid": 2338706, "tid": 2379422, "ts": 6345940486872.870, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940486874.137, "dur": 1.378, + "args": { + "External id": 988506,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940486874.733, "dur": 0.637, + "args": { + "External id": 988507,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940486881.323, "dur": 93.857, + "args": { + "External id": 988508,"Record function id": 0, "Sequence number": 10552621, "Fwd thread id": 1, "Ev Idx": 6491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940486884.532, "dur": 81.874, + "args": { + "External id": 988509,"Sequence number": 10552621, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6492 + } + }, + { + "ph": "f", "id": 298, "pid": 2338706, "tid": 2379422, "ts": 6345940486884.532, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940486887.363, "dur": 2.414, + "args": { + "External id": 988510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940486887.781, "dur": 1.553, + "args": { + "External id": 988511,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940486888.824, "dur": 0.415, + "args": { + "External id": 988512,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940486890.434, "dur": 32.826, + "args": { + "External id": 988513,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940486924.292, "dur": 5.537, + "args": { + "External id": 988514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940486924.906, "dur": 4.379, + "args": { + "External id": 988515,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940486928.245, "dur": 0.929, + "args": { + "External id": 988516,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940486931.254, "dur": 3.791, + "args": { + "External id": 988517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940486932.293, "dur": 2.166, + "args": { + "External id": 988518,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940486933.804, "dur": 0.574, + "args": { + "External id": 988519,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940486935.508, "dur": 30.210, + "args": { + "External id": 988520,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940486979.693, "dur": 50.187, + "args": { + "External id": 988521,"Record function id": 0, "Sequence number": 10552620, "Fwd thread id": 1, "Ev Idx": 6504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940486980.718, "dur": 5.950, + "args": { + "External id": 988522,"Sequence number": 10552620, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6505 + } + }, + { + "ph": "f", "id": 299, "pid": 2338706, "tid": 2379422, "ts": 6345940486980.718, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940486982.265, "dur": 4.270, + "args": { + "External id": 988523,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940486983.186, "dur": 3.226, + "args": { + "External id": 988524,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2379422, + "ts": 6345940486989.128, "dur": 36.005, + "args": { + "External id": 988525,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940487036.731, "dur": 11.750, + "args": { + "External id": 988526,"Record function id": 0, "Sequence number": 10552619, "Fwd thread id": 1, "Ev Idx": 6509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940487038.018, "dur": 8.012, + "args": { + "External id": 988527,"Sequence number": 10552619, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6510 + } + }, + { + "ph": "f", "id": 300, "pid": 2338706, "tid": 2379422, "ts": 6345940487038.018, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940487041.604, "dur": 4.181, + "args": { + "External id": 988528,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940487042.419, "dur": 2.653, + "args": { + "External id": 988529,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940487044.427, "dur": 0.532, + "args": { + "External id": 988530,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940487088.949, "dur": 8.654, + "args": { + "External id": 988531,"Record function id": 0, "Ev Idx": 6514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940487091.439, "dur": 5.389, + "args": { + "External id": 988532,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940487093.501, "dur": 2.828, + "args": { + "External id": 988533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940487094.245, "dur": 1.841, + "args": { + "External id": 988534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940487102.812, "dur": 439.298, + "args": { + "External id": 988535,"Record function id": 0, "Sequence number": 10552618, "Fwd thread id": 1, "Ev Idx": 6518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940487104.488, "dur": 400.193, + "args": { + "External id": 988536,"Sequence number": 10552618, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 6519 + } + }, + { + "ph": "f", "id": 301, "pid": 2338706, "tid": 2379422, "ts": 6345940487104.488, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2379422, + "ts": 6345940487129.401, "dur": 38.065, + "args": { + "External id": 988537,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345940487130.850, "dur": 36.391, + "args": { + "External id": 988538,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940487134.147, "dur": 7.636, + "args": { + "External id": 988539,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940487137.328, "dur": 3.838, + "args": { + "External id": 988540,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940487143.218, "dur": 23.412, + "args": { + "External id": 988541,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940487179.419, "dur": 4.629, + "args": { + "External id": 988542,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940487182.433, "dur": 1.476, + "args": { + "External id": 988543,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940487187.774, "dur": 1.654, + "args": { + "External id": 988544,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940487188.517, "dur": 0.808, + "args": { + "External id": 988545,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940487202.246, "dur": 2.696, + "args": { + "External id": 988546,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940487218.536, "dur": 4.388, + "args": { + "External id": 988547,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940487395.554, "dur": 4.356, + "args": { + "External id": 988548,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940487404.390, "dur": 34.109, + "args": { + "External id": 988549,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940487415.426, "dur": 1.155, + "args": { + "External id": 988550,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345940487444.867, "dur": 29.328, + "args": { + "External id": 988551,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940487446.599, "dur": 27.389, + "args": { + "External id": 988552,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940487451.003, "dur": 4.577, + "args": { + "External id": 988553,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940487459.005, "dur": 14.424, + "args": { + "External id": 988554,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345940487479.093, "dur": 2.570, + "args": { + "External id": 988555,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940487480.255, "dur": 1.256, + "args": { + "External id": 988556,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940487488.576, "dur": 2.761, + "args": { + "External id": 988557,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940487489.750, "dur": 1.485, + "args": { + "External id": 988558,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940487493.521, "dur": 1.756, + "args": { + "External id": 988559,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940487494.341, "dur": 0.853, + "args": { + "External id": 988560,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940487523.022, "dur": 17.384, + "args": { + "External id": 988561,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940487551.654, "dur": 8.132, + "args": { + "External id": 988562,"Record function id": 0, "Ev Idx": 6545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940487553.557, "dur": 5.494, + "args": { + "External id": 988563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940487555.584, "dur": 2.546, + "args": { + "External id": 988564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940487556.746, "dur": 1.294, + "args": { + "External id": 988565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940487563.310, "dur": 8.755, + "args": { + "External id": 988566,"Record function id": 0, "Sequence number": 10552617, "Fwd thread id": 1, "Ev Idx": 6549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940487564.477, "dur": 4.269, + "args": { + "External id": 988567,"Sequence number": 10552617, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6550 + } + }, + { + "ph": "f", "id": 302, "pid": 2338706, "tid": 2379422, "ts": 6345940487564.477, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940487566.292, "dur": 2.231, + "args": { + "External id": 988568,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940487567.160, "dur": 1.236, + "args": { + "External id": 988569,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940487575.538, "dur": 148.483, + "args": { + "External id": 988570,"Record function id": 0, "Sequence number": 10552616, "Fwd thread id": 1, "Ev Idx": 6553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940487576.321, "dur": 138.568, + "args": { + "External id": 988571,"Sequence number": 10552616, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6554 + } + }, + { + "ph": "f", "id": 303, "pid": 2338706, "tid": 2379422, "ts": 6345940487576.321, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940487583.374, "dur": 5.271, + "args": { + "External id": 988572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940487585.168, "dur": 2.774, + "args": { + "External id": 988573,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940487586.833, "dur": 0.909, + "args": { + "External id": 988574,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940487589.909, "dur": 72.191, + "args": { + "External id": 988575,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940487663.243, "dur": 6.522, + "args": { + "External id": 988576,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940487664.158, "dur": 5.067, + "args": { + "External id": 988577,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940487665.747, "dur": 3.341, + "args": { + "External id": 988578,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940487673.128, "dur": 3.580, + "args": { + "External id": 988579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940487674.094, "dur": 2.051, + "args": { + "External id": 988580,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940487675.334, "dur": 0.707, + "args": { + "External id": 988581,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940487677.162, "dur": 37.039, + "args": { + "External id": 988582,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940487729.221, "dur": 10.020, + "args": { + "External id": 988583,"Record function id": 0, "Sequence number": 10552615, "Fwd thread id": 1, "Ev Idx": 6566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940487730.170, "dur": 6.638, + "args": { + "External id": 988584,"Sequence number": 10552615, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6567 + } + }, + { + "ph": "f", "id": 304, "pid": 2338706, "tid": 2379422, "ts": 6345940487730.170, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940487732.301, "dur": 4.345, + "args": { + "External id": 988585,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940487733.198, "dur": 3.279, + "args": { + "External id": 988586,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940487743.465, "dur": 11.078, + "args": { + "External id": 988587,"Record function id": 0, "Sequence number": 10552614, "Fwd thread id": 1, "Ev Idx": 6570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940487744.417, "dur": 7.271, + "args": { + "External id": 988588,"Sequence number": 10552614, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6571 + } + }, + { + "ph": "f", "id": 305, "pid": 2338706, "tid": 2379422, "ts": 6345940487744.417, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940487745.727, "dur": 5.756, + "args": { + "External id": 988589,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940487746.748, "dur": 4.202, + "args": { + "External id": 988590,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940487750.362, "dur": 0.491, + "args": { + "External id": 988591,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940487758.229, "dur": 5.644, + "args": { + "External id": 988592,"Record function id": 0, "Ev Idx": 6575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940487760.136, "dur": 3.226, + "args": { + "External id": 988593,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940487761.188, "dur": 1.927, + "args": { + "External id": 988594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940487762.049, "dur": 0.957, + "args": { + "External id": 988595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940487767.024, "dur": 7.731, + "args": { + "External id": 988596,"Record function id": 0, "Sequence number": 10552613, "Fwd thread id": 1, "Ev Idx": 6579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940487767.834, "dur": 3.781, + "args": { + "External id": 988597,"Sequence number": 10552613, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6580 + } + }, + { + "ph": "f", "id": 306, "pid": 2338706, "tid": 2379422, "ts": 6345940487767.834, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940487769.392, "dur": 2.069, + "args": { + "External id": 988598,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940487770.261, "dur": 1.060, + "args": { + "External id": 988599,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940487779.181, "dur": 425.644, + "args": { + "External id": 988600,"Record function id": 0, "Sequence number": 10552612, "Fwd thread id": 1, "Ev Idx": 6583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940487780.893, "dur": 400.203, + "args": { + "External id": 988601,"Sequence number": 10552612, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6584 + } + }, + { + "ph": "f", "id": 307, "pid": 2338706, "tid": 2379422, "ts": 6345940487780.893, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940487797.474, "dur": 6.606, + "args": { + "External id": 988602,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940487799.955, "dur": 3.593, + "args": { + "External id": 988603,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940487806.346, "dur": 5.360, + "args": { + "External id": 988604,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940487807.920, "dur": 3.571, + "args": { + "External id": 988605,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940487813.228, "dur": 4.156, + "args": { + "External id": 988606,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940487815.204, "dur": 1.975, + "args": { + "External id": 988607,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940487842.646, "dur": 308.750, + "args": { + "External id": 988608,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 6591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940487924.454, "dur": 3.410, + "args": { + "External id": 988609,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940487929.900, "dur": 2.968, + "args": { + "External id": 988610,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940487934.445, "dur": 2.336, + "args": { + "External id": 988611,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940487938.222, "dur": 2.389, + "args": { + "External id": 988612,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940487984.958, "dur": 2.647, + "args": { + "External id": 988613,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940487986.235, "dur": 1.240, + "args": { + "External id": 988614,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940487991.204, "dur": 47.922, + "args": { + "External id": 988615,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940487996.198, "dur": 1.992, + "args": { + "External id": 988616,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940488042.050, "dur": 2.223, + "args": { + "External id": 988617,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940488043.239, "dur": 0.945, + "args": { + "External id": 988618,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940488045.474, "dur": 55.221, + "args": { + "External id": 988619,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940488047.809, "dur": 3.266, + "args": { + "External id": 988620,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940488166.784, "dur": 4.738, + "args": { + "External id": 988621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940488174.390, "dur": 0.823, + "args": { + "External id": 988622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940488177.349, "dur": 0.753, + "args": { + "External id": 988623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940488216.789, "dur": 235.106, + "args": { + "External id": 988624,"Record function id": 0, "Sequence number": 10552611, "Fwd thread id": 1, "Ev Idx": 6607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940488219.229, "dur": 225.101, + "args": { + "External id": 988625,"Sequence number": 10552611, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6608 + } + }, + { + "ph": "f", "id": 308, "pid": 2338706, "tid": 2379422, "ts": 6345940488219.229, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345940488241.986, "dur": 44.896, + "args": { + "External id": 988626,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940488246.308, "dur": 4.649, + "args": { + "External id": 988627,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940488252.115, "dur": 34.167, + "args": { + "External id": 988628,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 6611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940488298.271, "dur": 5.267, + "args": { + "External id": 988629,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940488300.669, "dur": 2.534, + "args": { + "External id": 988630,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940488459.109, "dur": 166.808, + "args": { + "External id": 988631,"Record function id": 0, "Sequence number": 10552610, "Fwd thread id": 1, "Ev Idx": 6614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940488461.045, "dur": 158.035, + "args": { + "External id": 988632,"Sequence number": 10552610, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6615 + } + }, + { + "ph": "f", "id": 309, "pid": 2338706, "tid": 2379422, "ts": 6345940488461.045, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345940488472.291, "dur": 47.738, + "args": { + "External id": 988633,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940488475.236, "dur": 3.092, + "args": { + "External id": 988634,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940488481.730, "dur": 37.838, + "args": { + "External id": 988635,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 6618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940488527.266, "dur": 5.577, + "args": { + "External id": 988636,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940488529.637, "dur": 2.898, + "args": { + "External id": 988637,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940488632.699, "dur": 16.076, + "args": { + "External id": 988638,"Record function id": 0, "Sequence number": 10552609, "Fwd thread id": 1, "Ev Idx": 6621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940488634.484, "dur": 11.494, + "args": { + "External id": 988639,"Sequence number": 10552609, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6622 + } + }, + { + "ph": "f", "id": 310, "pid": 2338706, "tid": 2379422, "ts": 6345940488634.484, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940488637.596, "dur": 8.130, + "args": { + "External id": 988640,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940488639.044, "dur": 6.504, + "args": { + "External id": 988641,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940488652.798, "dur": 8.252, + "args": { + "External id": 988642,"Record function id": 0, "Sequence number": 10552608, "Fwd thread id": 1, "Ev Idx": 6625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940488654.372, "dur": 3.812, + "args": { + "External id": 988643,"Sequence number": 10552608, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6626 + } + }, + { + "ph": "f", "id": 311, "pid": 2338706, "tid": 2379422, "ts": 6345940488654.372, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940488656.141, "dur": 1.905, + "args": { + "External id": 988644,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940488657.067, "dur": 0.836, + "args": { + "External id": 988645,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940488664.512, "dur": 10.821, + "args": { + "External id": 988646,"Record function id": 0, "Sequence number": 10552607, "Fwd thread id": 1, "Ev Idx": 6629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940488665.882, "dur": 6.643, + "args": { + "External id": 988647,"Sequence number": 10552607, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6630 + } + }, + { + "ph": "f", "id": 312, "pid": 2338706, "tid": 2379422, "ts": 6345940488665.882, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940488667.808, "dur": 4.557, + "args": { + "External id": 988648,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940488671.324, "dur": 0.915, + "args": { + "External id": 988649,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940488679.657, "dur": 6.702, + "args": { + "External id": 988650,"Record function id": 0, "Sequence number": 10552606, "Fwd thread id": 1, "Ev Idx": 6633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940488680.857, "dur": 3.288, + "args": { + "External id": 988651,"Sequence number": 10552606, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6634 + } + }, + { + "ph": "f", "id": 313, "pid": 2338706, "tid": 2379422, "ts": 6345940488680.857, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940488682.449, "dur": 1.556, + "args": { + "External id": 988652,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940488683.097, "dur": 0.819, + "args": { + "External id": 988653,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940488690.044, "dur": 167.991, + "args": { + "External id": 988654,"Record function id": 0, "Sequence number": 10552605, "Fwd thread id": 1, "Ev Idx": 6637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940488690.964, "dur": 158.767, + "args": { + "External id": 988655,"Sequence number": 10552605, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6638 + } + }, + { + "ph": "f", "id": 314, "pid": 2338706, "tid": 2379422, "ts": 6345940488690.964, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940488695.192, "dur": 8.958, + "args": { + "External id": 988656,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940488699.530, "dur": 3.915, + "args": { + "External id": 988657,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940488701.504, "dur": 1.663, + "args": { + "External id": 988658,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940488705.897, "dur": 73.329, + "args": { + "External id": 988659,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940488780.433, "dur": 9.418, + "args": { + "External id": 988660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940488781.410, "dur": 7.674, + "args": { + "External id": 988661,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940488786.919, "dur": 1.975, + "args": { + "External id": 988662,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940488794.066, "dur": 3.881, + "args": { + "External id": 988663,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940488795.491, "dur": 1.925, + "args": { + "External id": 988664,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940488796.984, "dur": 0.298, + "args": { + "External id": 988665,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940488798.598, "dur": 50.201, + "args": { + "External id": 988666,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940488863.411, "dur": 7.808, + "args": { + "External id": 988667,"Record function id": 0, "Sequence number": 10552604, "Fwd thread id": 1, "Ev Idx": 6650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940488864.375, "dur": 4.191, + "args": { + "External id": 988668,"Sequence number": 10552604, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6651 + } + }, + { + "ph": "f", "id": 315, "pid": 2338706, "tid": 2379422, "ts": 6345940488864.375, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940488866.115, "dur": 2.326, + "args": { + "External id": 988669,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940488867.065, "dur": 1.252, + "args": { + "External id": 988670,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940488874.901, "dur": 101.952, + "args": { + "External id": 988671,"Record function id": 0, "Sequence number": 10552603, "Fwd thread id": 1, "Ev Idx": 6654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940488875.716, "dur": 97.663, + "args": { + "External id": 988672,"Sequence number": 10552603, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6655 + } + }, + { + "ph": "f", "id": 316, "pid": 2338706, "tid": 2379422, "ts": 6345940488875.716, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940488969.005, "dur": 4.087, + "args": { + "External id": 988673,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940488970.242, "dur": 2.304, + "args": { + "External id": 988674,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940488971.961, "dur": 0.459, + "args": { + "External id": 988675,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940488983.052, "dur": 9.486, + "args": { + "External id": 988676,"Record function id": 0, "Ev Idx": 6659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940488984.620, "dur": 7.119, + "args": { + "External id": 988677,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940488987.271, "dur": 4.043, + "args": { + "External id": 988678,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940488988.529, "dur": 2.692, + "args": { + "External id": 988679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940488995.849, "dur": 8.072, + "args": { + "External id": 988680,"Record function id": 0, "Sequence number": 10552602, "Fwd thread id": 1, "Ev Idx": 6663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940488997.242, "dur": 3.119, + "args": { + "External id": 988681,"Sequence number": 10552602, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6664 + } + }, + { + "ph": "f", "id": 317, "pid": 2338706, "tid": 2379422, "ts": 6345940488997.242, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940488998.531, "dur": 1.672, + "args": { + "External id": 988682,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940488999.311, "dur": 0.794, + "args": { + "External id": 988683,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940489026.409, "dur": 159.202, + "args": { + "External id": 988684,"Record function id": 0, "Sequence number": 10552601, "Fwd thread id": 1, "Ev Idx": 6667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940489030.918, "dur": 143.280, + "args": { + "External id": 988685,"Sequence number": 10552601, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6668 + } + }, + { + "ph": "f", "id": 318, "pid": 2338706, "tid": 2379422, "ts": 6345940489030.918, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940489034.951, "dur": 3.747, + "args": { + "External id": 988686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940489035.637, "dur": 2.466, + "args": { + "External id": 988687,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940489037.078, "dur": 0.778, + "args": { + "External id": 988688,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940489039.650, "dur": 76.351, + "args": { + "External id": 988689,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940489118.444, "dur": 6.761, + "args": { + "External id": 988690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940489119.232, "dur": 5.176, + "args": { + "External id": 988691,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940489123.529, "dur": 0.767, + "args": { + "External id": 988692,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940489126.730, "dur": 5.372, + "args": { + "External id": 988693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940489127.690, "dur": 3.935, + "args": { + "External id": 988694,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940489128.906, "dur": 2.651, + "args": { + "External id": 988695,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940489132.580, "dur": 40.779, + "args": { + "External id": 988696,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940489193.089, "dur": 37.728, + "args": { + "External id": 988697,"Record function id": 0, "Sequence number": 10552600, "Fwd thread id": 1, "Ev Idx": 6680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940489194.219, "dur": 6.717, + "args": { + "External id": 988698,"Sequence number": 10552600, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6681 + } + }, + { + "ph": "f", "id": 319, "pid": 2338706, "tid": 2379422, "ts": 6345940489194.219, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940489196.011, "dur": 4.762, + "args": { + "External id": 988699,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940489196.915, "dur": 3.697, + "args": { + "External id": 988700,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2379422, + "ts": 6345940489204.826, "dur": 23.106, + "args": { + "External id": 988701,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940489234.897, "dur": 11.458, + "args": { + "External id": 988702,"Record function id": 0, "Sequence number": 10552599, "Fwd thread id": 1, "Ev Idx": 6685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940489237.966, "dur": 5.411, + "args": { + "External id": 988703,"Sequence number": 10552599, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6686 + } + }, + { + "ph": "f", "id": 320, "pid": 2338706, "tid": 2379422, "ts": 6345940489237.966, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940489239.285, "dur": 3.850, + "args": { + "External id": 988704,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940489240.066, "dur": 2.285, + "args": { + "External id": 988705,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940489241.519, "dur": 0.747, + "args": { + "External id": 988706,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940489250.722, "dur": 6.609, + "args": { + "External id": 988707,"Record function id": 0, "Ev Idx": 6690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940489252.283, "dur": 4.375, + "args": { + "External id": 988708,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940489253.760, "dur": 2.603, + "args": { + "External id": 988709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940489254.445, "dur": 1.773, + "args": { + "External id": 988710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940489261.091, "dur": 11.769, + "args": { + "External id": 988711,"Record function id": 0, "Sequence number": 10552598, "Fwd thread id": 1, "Ev Idx": 6694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940489262.825, "dur": 6.577, + "args": { + "External id": 988712,"Sequence number": 10552598, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6695 + } + }, + { + "ph": "f", "id": 321, "pid": 2338706, "tid": 2379422, "ts": 6345940489262.825, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940489264.509, "dur": 4.736, + "args": { + "External id": 988713,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940489268.095, "dur": 1.027, + "args": { + "External id": 988714,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940489276.399, "dur": 103.690, + "args": { + "External id": 988715,"Record function id": 0, "Sequence number": 10552597, "Fwd thread id": 1, "Ev Idx": 6698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940489277.190, "dur": 93.205, + "args": { + "External id": 988716,"Sequence number": 10552597, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6699 + } + }, + { + "ph": "f", "id": 322, "pid": 2338706, "tid": 2379422, "ts": 6345940489277.190, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940489280.226, "dur": 2.666, + "args": { + "External id": 988717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940489280.752, "dur": 1.665, + "args": { + "External id": 988718,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940489281.709, "dur": 0.613, + "args": { + "External id": 988719,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940489283.667, "dur": 40.932, + "args": { + "External id": 988720,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940489327.635, "dur": 3.288, + "args": { + "External id": 988721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940489328.045, "dur": 2.304, + "args": { + "External id": 988722,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940489329.273, "dur": 0.962, + "args": { + "External id": 988723,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940489331.970, "dur": 3.212, + "args": { + "External id": 988724,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940489332.826, "dur": 1.794, + "args": { + "External id": 988725,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940489334.195, "dur": 0.356, + "args": { + "External id": 988726,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940489337.434, "dur": 32.157, + "args": { + "External id": 988727,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940489384.773, "dur": 25.614, + "args": { + "External id": 988728,"Record function id": 0, "Sequence number": 10552596, "Fwd thread id": 1, "Ev Idx": 6711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940489385.746, "dur": 3.731, + "args": { + "External id": 988729,"Sequence number": 10552596, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6712 + } + }, + { + "ph": "f", "id": 323, "pid": 2338706, "tid": 2379422, "ts": 6345940489385.746, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940489387.539, "dur": 1.800, + "args": { + "External id": 988730,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940489388.246, "dur": 0.929, + "args": { + "External id": 988731,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940489392.070, "dur": 16.136, + "args": { + "External id": 988732,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940489414.181, "dur": 7.787, + "args": { + "External id": 988733,"Record function id": 0, "Sequence number": 10552595, "Fwd thread id": 1, "Ev Idx": 6716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940489415.063, "dur": 4.517, + "args": { + "External id": 988734,"Sequence number": 10552595, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6717 + } + }, + { + "ph": "f", "id": 324, "pid": 2338706, "tid": 2379422, "ts": 6345940489415.063, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940489415.980, "dur": 3.411, + "args": { + "External id": 988735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940489416.648, "dur": 2.240, + "args": { + "External id": 988736,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940489418.106, "dur": 0.655, + "args": { + "External id": 988737,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940489425.985, "dur": 4.755, + "args": { + "External id": 988738,"Record function id": 0, "Ev Idx": 6721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940489427.577, "dur": 2.702, + "args": { + "External id": 988739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940489428.307, "dur": 1.426, + "args": { + "External id": 988740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940489428.719, "dur": 0.911, + "args": { + "External id": 988741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940489435.266, "dur": 363.568, + "args": { + "External id": 988742,"Record function id": 0, "Sequence number": 10552594, "Fwd thread id": 1, "Ev Idx": 6725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940489436.706, "dur": 330.428, + "args": { + "External id": 988743,"Sequence number": 10552594, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6726 + } + }, + { + "ph": "f", "id": 325, "pid": 2338706, "tid": 2379422, "ts": 6345940489436.706, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940489475.783, "dur": 4.233, + "args": { + "External id": 988744,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940489478.485, "dur": 1.398, + "args": { + "External id": 988745,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940489494.652, "dur": 6.308, + "args": { + "External id": 988746,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940489511.655, "dur": 2.018, + "args": { + "External id": 988747,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940489668.810, "dur": 1.950, + "args": { + "External id": 988748,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940489674.762, "dur": 33.184, + "args": { + "External id": 988749,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940489685.400, "dur": 0.927, + "args": { + "External id": 988750,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345940489713.829, "dur": 30.790, + "args": { + "External id": 988751,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940489715.634, "dur": 28.776, + "args": { + "External id": 988752,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940489722.070, "dur": 4.309, + "args": { + "External id": 988753,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940489728.411, "dur": 15.503, + "args": { + "External id": 988754,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345940489748.891, "dur": 2.683, + "args": { + "External id": 988755,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940489750.418, "dur": 1.015, + "args": { + "External id": 988756,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940489757.554, "dur": 2.112, + "args": { + "External id": 988757,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940489758.742, "dur": 0.819, + "args": { + "External id": 988758,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940489779.928, "dur": 15.724, + "args": { + "External id": 988759,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940489807.754, "dur": 7.771, + "args": { + "External id": 988760,"Record function id": 0, "Ev Idx": 6743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940489809.887, "dur": 4.896, + "args": { + "External id": 988761,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940489811.514, "dur": 2.340, + "args": { + "External id": 988762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940489812.547, "dur": 1.212, + "args": { + "External id": 988763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940489819.769, "dur": 6.894, + "args": { + "External id": 988764,"Record function id": 0, "Sequence number": 10552593, "Fwd thread id": 1, "Ev Idx": 6747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940489821.189, "dur": 1.353, + "args": { + "External id": 988765,"Sequence number": 10552593, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6748 + } + }, + { + "ph": "f", "id": 326, "pid": 2338706, "tid": 2379422, "ts": 6345940489821.189, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940489830.551, "dur": 502.403, + "args": { + "External id": 988766,"Record function id": 0, "Sequence number": 10552592, "Fwd thread id": 1, "Ev Idx": 6749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940489846.101, "dur": 471.414, + "args": { + "External id": 988767,"Sequence number": 10552592, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6750 + } + }, + { + "ph": "f", "id": 327, "pid": 2338706, "tid": 2379422, "ts": 6345940489846.101, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940489879.331, "dur": 8.718, + "args": { + "External id": 988768,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940489884.534, "dur": 3.240, + "args": { + "External id": 988769,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 6752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940489891.275, "dur": 4.781, + "args": { + "External id": 988770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940489892.614, "dur": 2.676, + "args": { + "External id": 988771,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940489894.443, "dur": 0.711, + "args": { + "External id": 988772,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2379422, + "ts": 6345940489899.955, "dur": 90.679, + "args": { + "External id": 988773,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 6756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940489903.395, "dur": 7.572, + "args": { + "External id": 988774,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 6757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940489903.892, "dur": 6.563, + "args": { + "External id": 988775,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940489905.179, "dur": 5.120, + "args": { + "External id": 988776,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2379422, + "ts": 6345940489912.352, "dur": 77.753, + "args": { + "External id": 988777,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940489914.042, "dur": 75.017, + "args": { + "External id": 988778,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345940489995.413, "dur": 3.268, + "args": { + "External id": 988779,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 6762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940489997.129, "dur": 1.394, + "args": { + "External id": 988780,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940490087.181, "dur": 5.215, + "args": { + "External id": 988781,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940490094.448, "dur": 2.975, + "args": { + "External id": 988782,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940490098.988, "dur": 2.456, + "args": { + "External id": 988783,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940490138.080, "dur": 3.158, + "args": { + "External id": 988784,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940490139.111, "dur": 1.919, + "args": { + "External id": 988785,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338706, "tid": 2379422, + "ts": 6345940490166.037, "dur": 129.066, + "args": { + "External id": 988786,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 6769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2379422, + "ts": 6345940490172.584, "dur": 6.337, + "args": { + "External id": 988787,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940490176.942, "dur": 1.119, + "args": { + "External id": 988788,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940490180.451, "dur": 7.160, + "args": { + "External id": 988789,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 6772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940490183.748, "dur": 3.089, + "args": { + "External id": 988790,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 6773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2379422, + "ts": 6345940490189.409, "dur": 2.743, + "args": { + "External id": 988791,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940490191.362, "dur": 0.452, + "args": { + "External id": 988792,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940490193.133, "dur": 2.850, + "args": { + "External id": 988793,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940490194.929, "dur": 0.586, + "args": { + "External id": 988794,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 6777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940490199.944, "dur": 4.006, + "args": { + "External id": 988795,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 6778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940490203.098, "dur": 0.524, + "args": { + "External id": 988796,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 6779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940490204.888, "dur": 7.820, + "args": { + "External id": 988797,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 6780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940490209.904, "dur": 2.602, + "args": { + "External id": 988798,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 6781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940490216.209, "dur": 2.571, + "args": { + "External id": 988799,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 6782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940490218.074, "dur": 0.407, + "args": { + "External id": 988800,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 6783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940490219.491, "dur": 2.669, + "args": { + "External id": 988801,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940490220.535, "dur": 1.527, + "args": { + "External id": 988802,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345940490223.122, "dur": 58.330, + "args": { + "External id": 988803,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 6786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940490283.634, "dur": 3.208, + "args": { + "External id": 988804,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 6787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940490287.668, "dur": 3.386, + "args": { + "External id": 988805,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 6788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940490289.698, "dur": 0.677, + "args": { + "External id": 988806,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 6789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940490293.192, "dur": 0.857, + "args": { + "External id": 988807,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 6790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940490345.026, "dur": 9.712, + "args": { + "External id": 988808,"Record function id": 0, "Ev Idx": 6791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940490347.331, "dur": 6.626, + "args": { + "External id": 988809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940490349.554, "dur": 3.549, + "args": { + "External id": 988810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940490350.661, "dur": 2.321, + "args": { + "External id": 988811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940490358.881, "dur": 7.905, + "args": { + "External id": 988812,"Record function id": 0, "Sequence number": 10552591, "Fwd thread id": 1, "Ev Idx": 6795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940490360.138, "dur": 3.985, + "args": { + "External id": 988813,"Sequence number": 10552591, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6796 + } + }, + { + "ph": "f", "id": 328, "pid": 2338706, "tid": 2379422, "ts": 6345940490360.138, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940490361.732, "dur": 2.102, + "args": { + "External id": 988814,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940490362.846, "dur": 0.856, + "args": { + "External id": 988815,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940490370.760, "dur": 115.894, + "args": { + "External id": 988816,"Record function id": 0, "Sequence number": 10552590, "Fwd thread id": 1, "Ev Idx": 6799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940490371.686, "dur": 108.292, + "args": { + "External id": 988817,"Sequence number": 10552590, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6800 + } + }, + { + "ph": "f", "id": 329, "pid": 2338706, "tid": 2379422, "ts": 6345940490371.686, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940490376.662, "dur": 6.835, + "args": { + "External id": 988818,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940490378.255, "dur": 4.727, + "args": { + "External id": 988819,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940490382.256, "dur": 0.564, + "args": { + "External id": 988820,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940490384.878, "dur": 43.600, + "args": { + "External id": 988821,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940490429.928, "dur": 4.016, + "args": { + "External id": 988822,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940490430.751, "dur": 2.578, + "args": { + "External id": 988823,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940490432.301, "dur": 0.868, + "args": { + "External id": 988824,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940490435.680, "dur": 5.631, + "args": { + "External id": 988825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940490438.766, "dur": 1.922, + "args": { + "External id": 988826,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940490440.147, "dur": 0.442, + "args": { + "External id": 988827,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940490442.242, "dur": 36.954, + "args": { + "External id": 988828,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940490491.860, "dur": 7.395, + "args": { + "External id": 988829,"Record function id": 0, "Sequence number": 10552589, "Fwd thread id": 1, "Ev Idx": 6812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940490493.076, "dur": 4.687, + "args": { + "External id": 988830,"Sequence number": 10552589, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6813 + } + }, + { + "ph": "f", "id": 330, "pid": 2338706, "tid": 2379422, "ts": 6345940490493.076, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940490495.161, "dur": 2.457, + "args": { + "External id": 988831,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940490496.341, "dur": 1.122, + "args": { + "External id": 988832,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940490502.580, "dur": 9.612, + "args": { + "External id": 988833,"Record function id": 0, "Sequence number": 10552588, "Fwd thread id": 1, "Ev Idx": 6816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940490503.587, "dur": 6.183, + "args": { + "External id": 988834,"Sequence number": 10552588, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6817 + } + }, + { + "ph": "f", "id": 331, "pid": 2338706, "tid": 2379422, "ts": 6345940490503.587, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940490504.487, "dur": 5.079, + "args": { + "External id": 988835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940490507.135, "dur": 1.959, + "args": { + "External id": 988836,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940490508.398, "dur": 0.537, + "args": { + "External id": 988837,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940490516.529, "dur": 5.111, + "args": { + "External id": 988838,"Record function id": 0, "Ev Idx": 6821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940490517.989, "dur": 3.167, + "args": { + "External id": 988839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940490519.116, "dur": 1.807, + "args": { + "External id": 988840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940490519.597, "dur": 1.212, + "args": { + "External id": 988841,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940490525.156, "dur": 6.407, + "args": { + "External id": 988842,"Record function id": 0, "Sequence number": 10552587, "Fwd thread id": 1, "Ev Idx": 6825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940490526.236, "dur": 3.131, + "args": { + "External id": 988843,"Sequence number": 10552587, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6826 + } + }, + { + "ph": "f", "id": 332, "pid": 2338706, "tid": 2379422, "ts": 6345940490526.236, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940490527.707, "dur": 1.512, + "args": { + "External id": 988844,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940490528.434, "dur": 0.635, + "args": { + "External id": 988845,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940490534.792, "dur": 99.269, + "args": { + "External id": 988846,"Record function id": 0, "Sequence number": 10552586, "Fwd thread id": 1, "Ev Idx": 6829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940490537.897, "dur": 86.894, + "args": { + "External id": 988847,"Sequence number": 10552586, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6830 + } + }, + { + "ph": "f", "id": 333, "pid": 2338706, "tid": 2379422, "ts": 6345940490537.897, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940490541.024, "dur": 2.578, + "args": { + "External id": 988848,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940490541.586, "dur": 1.556, + "args": { + "External id": 988849,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940490542.536, "dur": 0.511, + "args": { + "External id": 988850,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940490544.191, "dur": 33.045, + "args": { + "External id": 988851,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940490578.316, "dur": 5.714, + "args": { + "External id": 988852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940490579.225, "dur": 4.297, + "args": { + "External id": 988853,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940490582.724, "dur": 0.681, + "args": { + "External id": 988854,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940490585.364, "dur": 6.263, + "args": { + "External id": 988855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940490586.903, "dur": 4.304, + "args": { + "External id": 988856,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940490588.670, "dur": 2.448, + "args": { + "External id": 988857,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940490592.270, "dur": 31.670, + "args": { + "External id": 988858,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940490638.457, "dur": 32.400, + "args": { + "External id": 988859,"Record function id": 0, "Sequence number": 10552585, "Fwd thread id": 1, "Ev Idx": 6842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940490639.700, "dur": 4.791, + "args": { + "External id": 988860,"Sequence number": 10552585, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6843 + } + }, + { + "ph": "f", "id": 334, "pid": 2338706, "tid": 2379422, "ts": 6345940490639.700, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940490641.705, "dur": 2.645, + "args": { + "External id": 988861,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940490642.911, "dur": 1.313, + "args": { + "External id": 988862,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2379422, + "ts": 6345940490646.949, "dur": 21.045, + "args": { + "External id": 988863,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940490675.223, "dur": 11.010, + "args": { + "External id": 988864,"Record function id": 0, "Sequence number": 10552584, "Fwd thread id": 1, "Ev Idx": 6847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940490678.445, "dur": 4.990, + "args": { + "External id": 988865,"Sequence number": 10552584, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6848 + } + }, + { + "ph": "f", "id": 335, "pid": 2338706, "tid": 2379422, "ts": 6345940490678.445, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940490679.336, "dur": 3.906, + "args": { + "External id": 988866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940490680.214, "dur": 2.472, + "args": { + "External id": 988867,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940490681.730, "dur": 0.837, + "args": { + "External id": 988868,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940490690.145, "dur": 5.529, + "args": { + "External id": 988869,"Record function id": 0, "Ev Idx": 6852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940490691.773, "dur": 3.386, + "args": { + "External id": 988870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940490692.589, "dur": 1.847, + "args": { + "External id": 988871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940490693.237, "dur": 1.087, + "args": { + "External id": 988872,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940490699.937, "dur": 519.099, + "args": { + "External id": 988873,"Record function id": 0, "Sequence number": 10552583, "Fwd thread id": 1, "Ev Idx": 6856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940490701.887, "dur": 476.597, + "args": { + "External id": 988874,"Sequence number": 10552583, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 6857 + } + }, + { + "ph": "f", "id": 336, "pid": 2338706, "tid": 2379422, "ts": 6345940490701.887, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2379422, + "ts": 6345940490722.621, "dur": 55.534, + "args": { + "External id": 988875,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345940490724.260, "dur": 53.617, + "args": { + "External id": 988876,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940490727.135, "dur": 26.843, + "args": { + "External id": 988877,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940490750.063, "dur": 3.193, + "args": { + "External id": 988878,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940490755.473, "dur": 21.734, + "args": { + "External id": 988879,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940490792.614, "dur": 2.655, + "args": { + "External id": 988880,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940490793.550, "dur": 1.575, + "args": { + "External id": 988881,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940490799.096, "dur": 3.515, + "args": { + "External id": 988882,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940490799.839, "dur": 2.677, + "args": { + "External id": 988883,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940490817.716, "dur": 2.574, + "args": { + "External id": 988884,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940490831.490, "dur": 2.410, + "args": { + "External id": 988885,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940490996.544, "dur": 3.587, + "args": { + "External id": 988886,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940491004.562, "dur": 89.639, + "args": { + "External id": 988887,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940491032.945, "dur": 1.005, + "args": { + "External id": 988888,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345940491103.114, "dur": 36.562, + "args": { + "External id": 988889,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940491105.137, "dur": 34.325, + "args": { + "External id": 988890,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940491110.611, "dur": 5.877, + "args": { + "External id": 988891,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940491120.851, "dur": 18.128, + "args": { + "External id": 988892,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345940491144.698, "dur": 3.253, + "args": { + "External id": 988893,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940491146.350, "dur": 1.492, + "args": { + "External id": 988894,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940491156.094, "dur": 2.949, + "args": { + "External id": 988895,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940491157.673, "dur": 1.260, + "args": { + "External id": 988896,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940491161.540, "dur": 4.355, + "args": { + "External id": 988897,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940491162.552, "dur": 3.251, + "args": { + "External id": 988898,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940491197.226, "dur": 20.244, + "args": { + "External id": 988899,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940491233.536, "dur": 9.956, + "args": { + "External id": 988900,"Record function id": 0, "Ev Idx": 6883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940491235.892, "dur": 6.768, + "args": { + "External id": 988901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940491238.319, "dur": 3.373, + "args": { + "External id": 988902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940491239.488, "dur": 2.122, + "args": { + "External id": 988903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940491247.452, "dur": 9.092, + "args": { + "External id": 988904,"Record function id": 0, "Sequence number": 10552582, "Fwd thread id": 1, "Ev Idx": 6887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940491248.635, "dur": 4.617, + "args": { + "External id": 988905,"Sequence number": 10552582, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6888 + } + }, + { + "ph": "f", "id": 337, "pid": 2338706, "tid": 2379422, "ts": 6345940491248.635, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940491250.817, "dur": 2.190, + "args": { + "External id": 988906,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940491251.658, "dur": 1.193, + "args": { + "External id": 988907,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940491260.308, "dur": 145.320, + "args": { + "External id": 988908,"Record function id": 0, "Sequence number": 10552581, "Fwd thread id": 1, "Ev Idx": 6891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940491261.718, "dur": 135.716, + "args": { + "External id": 988909,"Sequence number": 10552581, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6892 + } + }, + { + "ph": "f", "id": 338, "pid": 2338706, "tid": 2379422, "ts": 6345940491261.718, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940491269.118, "dur": 5.728, + "args": { + "External id": 988910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940491270.565, "dur": 3.660, + "args": { + "External id": 988911,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940491272.768, "dur": 1.217, + "args": { + "External id": 988912,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940491276.267, "dur": 70.541, + "args": { + "External id": 988913,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940491348.161, "dur": 6.383, + "args": { + "External id": 988914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940491348.871, "dur": 4.985, + "args": { + "External id": 988915,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940491353.015, "dur": 0.720, + "args": { + "External id": 988916,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940491356.000, "dur": 3.956, + "args": { + "External id": 988917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940491357.212, "dur": 2.181, + "args": { + "External id": 988918,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940491358.835, "dur": 0.484, + "args": { + "External id": 988919,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940491361.155, "dur": 35.550, + "args": { + "External id": 988920,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940491410.657, "dur": 12.164, + "args": { + "External id": 988921,"Record function id": 0, "Sequence number": 10552580, "Fwd thread id": 1, "Ev Idx": 6904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940491411.992, "dur": 8.232, + "args": { + "External id": 988922,"Sequence number": 10552580, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6905 + } + }, + { + "ph": "f", "id": 339, "pid": 2338706, "tid": 2379422, "ts": 6345940491411.992, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940491413.490, "dur": 6.553, + "args": { + "External id": 988923,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940491414.750, "dur": 5.123, + "args": { + "External id": 988924,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940491426.241, "dur": 9.455, + "args": { + "External id": 988925,"Record function id": 0, "Sequence number": 10552579, "Fwd thread id": 1, "Ev Idx": 6908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940491427.174, "dur": 6.638, + "args": { + "External id": 988926,"Sequence number": 10552579, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6909 + } + }, + { + "ph": "f", "id": 340, "pid": 2338706, "tid": 2379422, "ts": 6345940491427.174, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940491428.134, "dur": 5.476, + "args": { + "External id": 988927,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940491428.759, "dur": 4.300, + "args": { + "External id": 988928,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940491432.259, "dur": 0.649, + "args": { + "External id": 988929,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940491439.686, "dur": 5.530, + "args": { + "External id": 988930,"Record function id": 0, "Ev Idx": 6913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940491441.511, "dur": 3.189, + "args": { + "External id": 988931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940491442.630, "dur": 1.643, + "args": { + "External id": 988932,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940491443.167, "dur": 0.999, + "args": { + "External id": 988933,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940491448.832, "dur": 7.120, + "args": { + "External id": 988934,"Record function id": 0, "Sequence number": 10552578, "Fwd thread id": 1, "Ev Idx": 6917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940491449.667, "dur": 4.183, + "args": { + "External id": 988935,"Sequence number": 10552578, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6918 + } + }, + { + "ph": "f", "id": 341, "pid": 2338706, "tid": 2379422, "ts": 6345940491449.667, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940491451.420, "dur": 2.287, + "args": { + "External id": 988936,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940491452.537, "dur": 1.035, + "args": { + "External id": 988937,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940491460.592, "dur": 354.860, + "args": { + "External id": 988938,"Record function id": 0, "Sequence number": 10552577, "Fwd thread id": 1, "Ev Idx": 6921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940491462.262, "dur": 330.051, + "args": { + "External id": 988939,"Sequence number": 10552577, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6922 + } + }, + { + "ph": "f", "id": 342, "pid": 2338706, "tid": 2379422, "ts": 6345940491462.262, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940491479.575, "dur": 6.627, + "args": { + "External id": 988940,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940491482.223, "dur": 3.511, + "args": { + "External id": 988941,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940491488.416, "dur": 4.076, + "args": { + "External id": 988942,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940491490.376, "dur": 1.896, + "args": { + "External id": 988943,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940491493.894, "dur": 4.976, + "args": { + "External id": 988944,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940491496.086, "dur": 2.587, + "args": { + "External id": 988945,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940491524.372, "dur": 242.646, + "args": { + "External id": 988946,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 6929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940491610.118, "dur": 3.094, + "args": { + "External id": 988947,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940491615.726, "dur": 4.921, + "args": { + "External id": 988948,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940491621.922, "dur": 2.492, + "args": { + "External id": 988949,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940491625.696, "dur": 2.139, + "args": { + "External id": 988950,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940491670.641, "dur": 3.426, + "args": { + "External id": 988951,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940491672.416, "dur": 1.553, + "args": { + "External id": 988952,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940491676.028, "dur": 26.948, + "args": { + "External id": 988953,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940491681.296, "dur": 2.672, + "args": { + "External id": 988954,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940491707.160, "dur": 1.934, + "args": { + "External id": 988955,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940491708.396, "dur": 0.603, + "args": { + "External id": 988956,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940491710.374, "dur": 13.900, + "args": { + "External id": 988957,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940491713.023, "dur": 0.357, + "args": { + "External id": 988958,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940491779.557, "dur": 3.331, + "args": { + "External id": 988959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940491786.212, "dur": 0.828, + "args": { + "External id": 988960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940491788.912, "dur": 0.506, + "args": { + "External id": 988961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940491822.979, "dur": 300.398, + "args": { + "External id": 988962,"Record function id": 0, "Sequence number": 10552576, "Fwd thread id": 1, "Ev Idx": 6945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940491825.167, "dur": 287.458, + "args": { + "External id": 988963,"Sequence number": 10552576, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6946 + } + }, + { + "ph": "f", "id": 343, "pid": 2338706, "tid": 2379422, "ts": 6345940491825.167, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345940491844.932, "dur": 44.908, + "args": { + "External id": 988964,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940491847.610, "dur": 3.656, + "args": { + "External id": 988965,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940491852.598, "dur": 36.581, + "args": { + "External id": 988966,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 6949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940491899.774, "dur": 5.606, + "args": { + "External id": 988967,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940491902.557, "dur": 2.516, + "args": { + "External id": 988968,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940492134.983, "dur": 218.564, + "args": { + "External id": 988969,"Record function id": 0, "Sequence number": 10552575, "Fwd thread id": 1, "Ev Idx": 6952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940492137.414, "dur": 209.034, + "args": { + "External id": 988970,"Sequence number": 10552575, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6953 + } + }, + { + "ph": "f", "id": 344, "pid": 2338706, "tid": 2379422, "ts": 6345940492137.414, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345940492151.784, "dur": 70.014, + "args": { + "External id": 988971,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940492160.347, "dur": 5.944, + "args": { + "External id": 988972,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940492180.814, "dur": 40.237, + "args": { + "External id": 988973,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 6956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940492231.246, "dur": 5.996, + "args": { + "External id": 988974,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940492233.631, "dur": 3.299, + "args": { + "External id": 988975,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492360.517, "dur": 16.262, + "args": { + "External id": 988976,"Record function id": 0, "Sequence number": 10552574, "Fwd thread id": 1, "Ev Idx": 6959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492362.307, "dur": 11.460, + "args": { + "External id": 988977,"Sequence number": 10552574, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6960 + } + }, + { + "ph": "f", "id": 345, "pid": 2338706, "tid": 2379422, "ts": 6345940492362.307, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940492365.162, "dur": 8.316, + "args": { + "External id": 988978,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940492366.942, "dur": 6.326, + "args": { + "External id": 988979,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492380.871, "dur": 8.416, + "args": { + "External id": 988980,"Record function id": 0, "Sequence number": 10552573, "Fwd thread id": 1, "Ev Idx": 6963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492382.396, "dur": 4.363, + "args": { + "External id": 988981,"Sequence number": 10552573, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6964 + } + }, + { + "ph": "f", "id": 346, "pid": 2338706, "tid": 2379422, "ts": 6345940492382.396, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940492384.243, "dur": 2.355, + "args": { + "External id": 988982,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940492385.618, "dur": 0.876, + "args": { + "External id": 988983,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492392.708, "dur": 8.457, + "args": { + "External id": 988984,"Record function id": 0, "Sequence number": 10552572, "Fwd thread id": 1, "Ev Idx": 6967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492393.349, "dur": 5.679, + "args": { + "External id": 988985,"Sequence number": 10552572, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6968 + } + }, + { + "ph": "f", "id": 347, "pid": 2338706, "tid": 2379422, "ts": 6345940492393.349, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940492394.961, "dur": 3.915, + "args": { + "External id": 988986,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940492397.885, "dur": 0.805, + "args": { + "External id": 988987,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492405.026, "dur": 7.755, + "args": { + "External id": 988988,"Record function id": 0, "Sequence number": 10552571, "Fwd thread id": 1, "Ev Idx": 6971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492406.492, "dur": 3.320, + "args": { + "External id": 988989,"Sequence number": 10552571, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6972 + } + }, + { + "ph": "f", "id": 348, "pid": 2338706, "tid": 2379422, "ts": 6345940492406.492, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940492407.957, "dur": 1.709, + "args": { + "External id": 988990,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940492408.756, "dur": 0.823, + "args": { + "External id": 988991,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492416.535, "dur": 180.663, + "args": { + "External id": 988992,"Record function id": 0, "Sequence number": 10552570, "Fwd thread id": 1, "Ev Idx": 6975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492417.307, "dur": 169.592, + "args": { + "External id": 988993,"Sequence number": 10552570, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6976 + } + }, + { + "ph": "f", "id": 349, "pid": 2338706, "tid": 2379422, "ts": 6345940492417.307, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940492422.926, "dur": 9.796, + "args": { + "External id": 988994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940492427.396, "dur": 4.623, + "args": { + "External id": 988995,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940492430.141, "dur": 1.615, + "args": { + "External id": 988996,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940492434.548, "dur": 72.161, + "args": { + "External id": 988997,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940492508.235, "dur": 7.088, + "args": { + "External id": 988998,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940492509.102, "dur": 5.510, + "args": { + "External id": 988999,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940492510.857, "dur": 3.590, + "args": { + "External id": 989000,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940492517.128, "dur": 5.877, + "args": { + "External id": 989001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940492520.628, "dur": 1.707, + "args": { + "External id": 989002,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940492521.801, "dur": 0.465, + "args": { + "External id": 989003,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940492523.935, "dur": 62.305, + "args": { + "External id": 989004,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492602.270, "dur": 7.433, + "args": { + "External id": 989005,"Record function id": 0, "Sequence number": 10552569, "Fwd thread id": 1, "Ev Idx": 6988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492603.359, "dur": 4.695, + "args": { + "External id": 989006,"Sequence number": 10552569, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6989 + } + }, + { + "ph": "f", "id": 350, "pid": 2338706, "tid": 2379422, "ts": 6345940492603.359, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940492605.159, "dur": 2.740, + "args": { + "External id": 989007,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940492606.416, "dur": 1.384, + "args": { + "External id": 989008,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492613.086, "dur": 10.427, + "args": { + "External id": 989009,"Record function id": 0, "Sequence number": 10552568, "Fwd thread id": 1, "Ev Idx": 6992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492614.020, "dur": 6.756, + "args": { + "External id": 989010,"Sequence number": 10552568, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6993 + } + }, + { + "ph": "f", "id": 351, "pid": 2338706, "tid": 2379422, "ts": 6345940492614.020, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940492615.037, "dur": 5.503, + "args": { + "External id": 989011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940492615.458, "dur": 4.601, + "args": { + "External id": 989012,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940492619.379, "dur": 0.551, + "args": { + "External id": 989013,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940492629.239, "dur": 9.481, + "args": { + "External id": 989014,"Record function id": 0, "Ev Idx": 6997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940492630.981, "dur": 7.036, + "args": { + "External id": 989015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940492633.548, "dur": 4.109, + "args": { + "External id": 989016,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940492634.852, "dur": 2.711, + "args": { + "External id": 989017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492642.276, "dur": 6.502, + "args": { + "External id": 989018,"Record function id": 0, "Sequence number": 10552567, "Fwd thread id": 1, "Ev Idx": 7001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492643.507, "dur": 2.982, + "args": { + "External id": 989019,"Sequence number": 10552567, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 7002 + } + }, + { + "ph": "f", "id": 352, "pid": 2338706, "tid": 2379422, "ts": 6345940492643.507, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940492644.692, "dur": 1.635, + "args": { + "External id": 989020,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940492645.601, "dur": 0.627, + "args": { + "External id": 989021,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492652.403, "dur": 106.978, + "args": { + "External id": 989022,"Record function id": 0, "Sequence number": 10552566, "Fwd thread id": 1, "Ev Idx": 7005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492655.429, "dur": 97.119, + "args": { + "External id": 989023,"Sequence number": 10552566, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7006 + } + }, + { + "ph": "f", "id": 353, "pid": 2338706, "tid": 2379422, "ts": 6345940492655.429, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940492659.316, "dur": 2.947, + "args": { + "External id": 989024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940492660.134, "dur": 1.688, + "args": { + "External id": 989025,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 7008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940492661.267, "dur": 0.442, + "args": { + "External id": 989026,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 7009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940492663.093, "dur": 27.702, + "args": { + "External id": 989027,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 7010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940492692.065, "dur": 5.454, + "args": { + "External id": 989028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940492692.662, "dur": 4.283, + "args": { + "External id": 989029,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 7012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940492695.946, "dur": 0.870, + "args": { + "External id": 989030,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 7013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940492698.519, "dur": 4.520, + "args": { + "External id": 989031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940492700.351, "dur": 2.220, + "args": { + "External id": 989032,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940492702.068, "dur": 0.435, + "args": { + "External id": 989033,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940492703.745, "dur": 48.240, + "args": { + "External id": 989034,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 7017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492763.770, "dur": 32.412, + "args": { + "External id": 989035,"Record function id": 0, "Sequence number": 10552565, "Fwd thread id": 1, "Ev Idx": 7018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492765.115, "dur": 4.222, + "args": { + "External id": 989036,"Sequence number": 10552565, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7019 + } + }, + { + "ph": "f", "id": 354, "pid": 2338706, "tid": 2379422, "ts": 6345940492765.115, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940492766.828, "dur": 2.368, + "args": { + "External id": 989037,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940492767.856, "dur": 1.219, + "args": { + "External id": 989038,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2379422, + "ts": 6345940492772.421, "dur": 21.458, + "args": { + "External id": 989039,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492802.348, "dur": 7.569, + "args": { + "External id": 989040,"Record function id": 0, "Sequence number": 10552564, "Fwd thread id": 1, "Ev Idx": 7023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492803.216, "dur": 4.816, + "args": { + "External id": 989041,"Sequence number": 10552564, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7024 + } + }, + { + "ph": "f", "id": 355, "pid": 2338706, "tid": 2379422, "ts": 6345940492803.216, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940492804.441, "dur": 3.371, + "args": { + "External id": 989042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940492805.352, "dur": 1.966, + "args": { + "External id": 989043,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940492806.720, "dur": 0.458, + "args": { + "External id": 989044,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940492814.177, "dur": 7.239, + "args": { + "External id": 989045,"Record function id": 0, "Ev Idx": 7028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940492815.708, "dur": 5.207, + "args": { + "External id": 989046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940492816.636, "dur": 3.848, + "args": { + "External id": 989047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940492817.027, "dur": 3.368, + "args": { + "External id": 989048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492824.805, "dur": 9.051, + "args": { + "External id": 989049,"Record function id": 0, "Sequence number": 10552563, "Fwd thread id": 1, "Ev Idx": 7032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492826.209, "dur": 4.895, + "args": { + "External id": 989050,"Sequence number": 10552563, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7033 + } + }, + { + "ph": "f", "id": 356, "pid": 2338706, "tid": 2379422, "ts": 6345940492826.209, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940492827.585, "dur": 3.363, + "args": { + "External id": 989051,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940492829.933, "dur": 0.922, + "args": { + "External id": 989052,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492837.305, "dur": 129.207, + "args": { + "External id": 989053,"Record function id": 0, "Sequence number": 10552562, "Fwd thread id": 1, "Ev Idx": 7036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492838.175, "dur": 118.977, + "args": { + "External id": 989054,"Sequence number": 10552562, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7037 + } + }, + { + "ph": "f", "id": 357, "pid": 2338706, "tid": 2379422, "ts": 6345940492838.175, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940492840.435, "dur": 2.615, + "args": { + "External id": 989055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940492840.934, "dur": 1.652, + "args": { + "External id": 989056,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 7039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940492841.928, "dur": 0.553, + "args": { + "External id": 989057,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940492843.879, "dur": 50.223, + "args": { + "External id": 989058,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 7041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940492897.340, "dur": 3.210, + "args": { + "External id": 989059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940492897.872, "dur": 2.158, + "args": { + "External id": 989060,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940492899.250, "dur": 0.653, + "args": { + "External id": 989061,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940492901.845, "dur": 4.567, + "args": { + "External id": 989062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940492903.131, "dur": 2.655, + "args": { + "External id": 989063,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940492905.094, "dur": 0.623, + "args": { + "External id": 989064,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940492909.085, "dur": 47.361, + "args": { + "External id": 989065,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 7048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492971.378, "dur": 24.318, + "args": { + "External id": 989066,"Record function id": 0, "Sequence number": 10552561, "Fwd thread id": 1, "Ev Idx": 7049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492972.602, "dur": 3.941, + "args": { + "External id": 989067,"Sequence number": 10552561, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7050 + } + }, + { + "ph": "f", "id": 358, "pid": 2338706, "tid": 2379422, "ts": 6345940492972.602, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940492974.355, "dur": 2.044, + "args": { + "External id": 989068,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940492975.276, "dur": 1.000, + "args": { + "External id": 989069,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940492979.037, "dur": 14.567, + "args": { + "External id": 989070,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940492999.434, "dur": 27.703, + "args": { + "External id": 989071,"Record function id": 0, "Sequence number": 10552560, "Fwd thread id": 1, "Ev Idx": 7054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940493000.528, "dur": 6.539, + "args": { + "External id": 989072,"Sequence number": 10552560, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7055 + } + }, + { + "ph": "f", "id": 359, "pid": 2338706, "tid": 2379422, "ts": 6345940493000.528, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940493001.177, "dur": 5.704, + "args": { + "External id": 989073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940493001.920, "dur": 4.467, + "args": { + "External id": 989074,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940493003.623, "dur": 2.679, + "args": { + "External id": 989075,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940493033.926, "dur": 7.624, + "args": { + "External id": 989076,"Record function id": 0, "Ev Idx": 7059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940493035.758, "dur": 5.308, + "args": { + "External id": 989077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940493037.668, "dur": 2.649, + "args": { + "External id": 989078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940493038.755, "dur": 1.459, + "args": { + "External id": 989079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940493046.888, "dur": 393.241, + "args": { + "External id": 989080,"Record function id": 0, "Sequence number": 10552559, "Fwd thread id": 1, "Ev Idx": 7063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940493048.567, "dur": 361.664, + "args": { + "External id": 989081,"Sequence number": 10552559, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7064 + } + }, + { + "ph": "f", "id": 360, "pid": 2338706, "tid": 2379422, "ts": 6345940493048.567, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940493122.418, "dur": 2.924, + "args": { + "External id": 989082,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940493123.256, "dur": 1.828, + "args": { + "External id": 989083,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940493139.926, "dur": 5.210, + "args": { + "External id": 989084,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940493154.522, "dur": 2.244, + "args": { + "External id": 989085,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940493304.000, "dur": 2.082, + "args": { + "External id": 989086,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 7069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940493309.936, "dur": 36.824, + "args": { + "External id": 989087,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 7070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940493322.301, "dur": 0.992, + "args": { + "External id": 989088,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 7071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345940493353.071, "dur": 30.351, + "args": { + "External id": 989089,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 7072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940493357.309, "dur": 25.866, + "args": { + "External id": 989090,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 7073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940493361.590, "dur": 4.159, + "args": { + "External id": 989091,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940493367.491, "dur": 15.132, + "args": { + "External id": 989092,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 7075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345940493388.098, "dur": 4.753, + "args": { + "External id": 989093,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 7076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940493389.797, "dur": 2.914, + "args": { + "External id": 989094,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 7077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940493398.555, "dur": 4.471, + "args": { + "External id": 989095,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940493402.052, "dur": 0.885, + "args": { + "External id": 989096,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940493422.131, "dur": 14.381, + "args": { + "External id": 989097,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940493452.178, "dur": 8.809, + "args": { + "External id": 989098,"Record function id": 0, "Ev Idx": 7081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940493454.441, "dur": 5.803, + "args": { + "External id": 989099,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940493456.476, "dur": 2.895, + "args": { + "External id": 989100,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940493457.320, "dur": 1.930, + "args": { + "External id": 989101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940493464.988, "dur": 5.894, + "args": { + "External id": 989102,"Record function id": 0, "Sequence number": 10552558, "Fwd thread id": 1, "Ev Idx": 7085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940493466.788, "dur": 1.352, + "args": { + "External id": 989103,"Sequence number": 10552558, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7086 + } + }, + { + "ph": "f", "id": 361, "pid": 2338706, "tid": 2379422, "ts": 6345940493466.788, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940493475.239, "dur": 418.913, + "args": { + "External id": 989104,"Record function id": 0, "Sequence number": 10552557, "Fwd thread id": 1, "Ev Idx": 7087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940493476.443, "dur": 405.003, + "args": { + "External id": 989105,"Sequence number": 10552557, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7088 + } + }, + { + "ph": "f", "id": 362, "pid": 2338706, "tid": 2379422, "ts": 6345940493476.443, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940493508.191, "dur": 8.615, + "args": { + "External id": 989106,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940493513.157, "dur": 3.374, + "args": { + "External id": 989107,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 7090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940493519.857, "dur": 5.371, + "args": { + "External id": 989108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940493521.854, "dur": 2.685, + "args": { + "External id": 989109,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940493523.696, "dur": 0.659, + "args": { + "External id": 989110,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2379422, + "ts": 6345940493528.681, "dur": 105.358, + "args": { + "External id": 989111,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 7094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940493531.541, "dur": 3.583, + "args": { + "External id": 989112,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 7095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940493532.268, "dur": 2.404, + "args": { + "External id": 989113,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 7096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940493533.746, "dur": 0.818, + "args": { + "External id": 989114,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 7097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2379422, + "ts": 6345940493536.157, "dur": 97.464, + "args": { + "External id": 989115,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 7098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940493537.722, "dur": 95.132, + "args": { + "External id": 989116,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 7099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345940493638.448, "dur": 2.744, + "args": { + "External id": 989117,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 7100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940493639.953, "dur": 1.097, + "args": { + "External id": 989118,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 7101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940493674.317, "dur": 3.001, + "args": { + "External id": 989119,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940493678.935, "dur": 4.683, + "args": { + "External id": 989120,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940493684.970, "dur": 2.348, + "args": { + "External id": 989121,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940493717.651, "dur": 2.543, + "args": { + "External id": 989122,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940493718.961, "dur": 0.971, + "args": { + "External id": 989123,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338706, "tid": 2379422, + "ts": 6345940493742.959, "dur": 119.534, + "args": { + "External id": 989124,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 7107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2379422, + "ts": 6345940493750.760, "dur": 5.345, + "args": { + "External id": 989125,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940493754.439, "dur": 0.841, + "args": { + "External id": 989126,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940493757.755, "dur": 5.164, + "args": { + "External id": 989127,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 7110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940493761.288, "dur": 0.783, + "args": { + "External id": 989128,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 7111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2379422, + "ts": 6345940493764.100, "dur": 2.927, + "args": { + "External id": 989129,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 7112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940493765.997, "dur": 0.649, + "args": { + "External id": 989130,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 7113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940493767.953, "dur": 2.782, + "args": { + "External id": 989131,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 7114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940493769.703, "dur": 0.596, + "args": { + "External id": 989132,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 7115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940493774.664, "dur": 5.081, + "args": { + "External id": 989133,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 7116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940493778.997, "dur": 0.432, + "args": { + "External id": 989134,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 7117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940493781.002, "dur": 7.885, + "args": { + "External id": 989135,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 7118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940493785.134, "dur": 3.571, + "args": { + "External id": 989136,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 7119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940493790.059, "dur": 2.744, + "args": { + "External id": 989137,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 7120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940493791.907, "dur": 0.615, + "args": { + "External id": 989138,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 7121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940493795.686, "dur": 2.540, + "args": { + "External id": 989139,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 7122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940493796.714, "dur": 1.413, + "args": { + "External id": 989140,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 7123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345940493799.258, "dur": 52.588, + "args": { + "External id": 989141,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 7124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940493853.461, "dur": 1.318, + "args": { + "External id": 989142,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 7125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338706, "tid": 2379422, + "ts": 6345940493855.737, "dur": 2.909, + "args": { + "External id": 989143,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 7126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940493857.656, "dur": 0.459, + "args": { + "External id": 989144,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 7127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940493860.671, "dur": 0.855, + "args": { + "External id": 989145,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 7128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940493903.739, "dur": 8.587, + "args": { + "External id": 989146,"Record function id": 0, "Ev Idx": 7129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940493905.632, "dur": 6.014, + "args": { + "External id": 989147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940493907.530, "dur": 3.003, + "args": { + "External id": 989148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940493908.587, "dur": 1.846, + "args": { + "External id": 989149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940493916.654, "dur": 7.551, + "args": { + "External id": 989150,"Record function id": 0, "Sequence number": 10552556, "Fwd thread id": 1, "Ev Idx": 7133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940493917.990, "dur": 3.727, + "args": { + "External id": 989151,"Sequence number": 10552556, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 7134 + } + }, + { + "ph": "f", "id": 363, "pid": 2338706, "tid": 2379422, "ts": 6345940493917.990, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940493919.689, "dur": 1.851, + "args": { + "External id": 989152,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940493920.484, "dur": 0.950, + "args": { + "External id": 989153,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940493927.984, "dur": 172.131, + "args": { + "External id": 989154,"Record function id": 0, "Sequence number": 10552555, "Fwd thread id": 1, "Ev Idx": 7137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940493929.154, "dur": 161.379, + "args": { + "External id": 989155,"Sequence number": 10552555, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7138 + } + }, + { + "ph": "f", "id": 364, "pid": 2338706, "tid": 2379422, "ts": 6345940493929.154, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940493932.792, "dur": 7.670, + "args": { + "External id": 989156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940493933.933, "dur": 5.931, + "args": { + "External id": 989157,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 7140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940493937.211, "dur": 2.525, + "args": { + "External id": 989158,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 7141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940493941.331, "dur": 40.396, + "args": { + "External id": 989159,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 7142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940493982.906, "dur": 4.335, + "args": { + "External id": 989160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940493983.660, "dur": 2.923, + "args": { + "External id": 989161,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 7144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940493985.644, "dur": 0.773, + "args": { + "External id": 989162,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 7145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940493989.318, "dur": 6.286, + "args": { + "External id": 989163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940493990.764, "dur": 4.298, + "args": { + "External id": 989164,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940493994.237, "dur": 0.755, + "args": { + "External id": 989165,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940493996.352, "dur": 92.445, + "args": { + "External id": 989166,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 7149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940494108.399, "dur": 8.053, + "args": { + "External id": 989167,"Record function id": 0, "Sequence number": 10552554, "Fwd thread id": 1, "Ev Idx": 7150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940494109.757, "dur": 5.392, + "args": { + "External id": 989168,"Sequence number": 10552554, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7151 + } + }, + { + "ph": "f", "id": 365, "pid": 2338706, "tid": 2379422, "ts": 6345940494109.757, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940494111.835, "dur": 3.173, + "args": { + "External id": 989169,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940494112.960, "dur": 1.845, + "args": { + "External id": 989170,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940494119.975, "dur": 11.302, + "args": { + "External id": 989171,"Record function id": 0, "Sequence number": 10552553, "Fwd thread id": 1, "Ev Idx": 7154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940494121.035, "dur": 7.477, + "args": { + "External id": 989172,"Sequence number": 10552553, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7155 + } + }, + { + "ph": "f", "id": 366, "pid": 2338706, "tid": 2379422, "ts": 6345940494121.035, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940494122.120, "dur": 6.189, + "args": { + "External id": 989173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940494123.371, "dur": 4.422, + "args": { + "External id": 989174,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940494127.133, "dur": 0.521, + "args": { + "External id": 989175,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940494135.641, "dur": 9.254, + "args": { + "External id": 989176,"Record function id": 0, "Ev Idx": 7159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940494137.182, "dur": 7.160, + "args": { + "External id": 989177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940494138.782, "dur": 5.266, + "args": { + "External id": 989178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940494139.803, "dur": 4.161, + "args": { + "External id": 989179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940494148.496, "dur": 6.772, + "args": { + "External id": 989180,"Record function id": 0, "Sequence number": 10552552, "Fwd thread id": 1, "Ev Idx": 7163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940494149.704, "dur": 3.329, + "args": { + "External id": 989181,"Sequence number": 10552552, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 7164 + } + }, + { + "ph": "f", "id": 367, "pid": 2338706, "tid": 2379422, "ts": 6345940494149.704, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940494150.931, "dur": 1.952, + "args": { + "External id": 989182,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940494151.899, "dur": 0.865, + "args": { + "External id": 989183,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940494158.973, "dur": 213.125, + "args": { + "External id": 989184,"Record function id": 0, "Sequence number": 10552551, "Fwd thread id": 1, "Ev Idx": 7167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940494262.107, "dur": 100.407, + "args": { + "External id": 989185,"Sequence number": 10552551, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7168 + } + }, + { + "ph": "f", "id": 368, "pid": 2338706, "tid": 2379422, "ts": 6345940494262.107, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940494266.005, "dur": 3.859, + "args": { + "External id": 989186,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940494266.550, "dur": 2.712, + "args": { + "External id": 989187,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 7170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940494268.475, "dur": 0.612, + "args": { + "External id": 989188,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 7171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940494270.722, "dur": 44.615, + "args": { + "External id": 989189,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 7172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940494316.601, "dur": 7.419, + "args": { + "External id": 989190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940494317.547, "dur": 5.841, + "args": { + "External id": 989191,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 7174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940494320.534, "dur": 2.714, + "args": { + "External id": 989192,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 7175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940494325.574, "dur": 3.594, + "args": { + "External id": 989193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940494327.153, "dur": 1.586, + "args": { + "External id": 989194,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940494328.313, "dur": 0.358, + "args": { + "External id": 989195,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940494329.828, "dur": 32.000, + "args": { + "External id": 989196,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 7179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940494377.078, "dur": 37.795, + "args": { + "External id": 989197,"Record function id": 0, "Sequence number": 10552550, "Fwd thread id": 1, "Ev Idx": 7180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940494377.996, "dur": 4.366, + "args": { + "External id": 989198,"Sequence number": 10552550, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7181 + } + }, + { + "ph": "f", "id": 369, "pid": 2338706, "tid": 2379422, "ts": 6345940494377.996, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940494379.976, "dur": 2.236, + "args": { + "External id": 989199,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940494380.853, "dur": 1.197, + "args": { + "External id": 989200,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2379422, + "ts": 6345940494385.043, "dur": 26.465, + "args": { + "External id": 989201,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940494418.934, "dur": 9.386, + "args": { + "External id": 989202,"Record function id": 0, "Sequence number": 10552549, "Fwd thread id": 1, "Ev Idx": 7185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940494422.160, "dur": 3.975, + "args": { + "External id": 989203,"Sequence number": 10552549, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7186 + } + }, + { + "ph": "f", "id": 370, "pid": 2338706, "tid": 2379422, "ts": 6345940494422.160, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940494423.127, "dur": 2.800, + "args": { + "External id": 989204,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940494423.770, "dur": 1.645, + "args": { + "External id": 989205,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940494424.852, "dur": 0.443, + "args": { + "External id": 989206,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940494432.975, "dur": 4.622, + "args": { + "External id": 989207,"Record function id": 0, "Ev Idx": 7190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940494434.335, "dur": 2.746, + "args": { + "External id": 989208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940494435.161, "dur": 1.491, + "args": { + "External id": 989209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940494435.605, "dur": 0.956, + "args": { + "External id": 989210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940494441.840, "dur": 444.609, + "args": { + "External id": 989211,"Record function id": 0, "Sequence number": 10552548, "Fwd thread id": 1, "Ev Idx": 7194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940494450.608, "dur": 397.821, + "args": { + "External id": 989212,"Sequence number": 10552548, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 7195 + } + }, + { + "ph": "f", "id": 371, "pid": 2338706, "tid": 2379422, "ts": 6345940494450.608, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2379422, + "ts": 6345940494479.384, "dur": 36.684, + "args": { + "External id": 989213,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345940494481.091, "dur": 34.784, + "args": { + "External id": 989214,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940494483.709, "dur": 7.090, + "args": { + "External id": 989215,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 7198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940494486.831, "dur": 3.337, + "args": { + "External id": 989216,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940494492.179, "dur": 23.127, + "args": { + "External id": 989217,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940494531.062, "dur": 1.730, + "args": { + "External id": 989218,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940494531.566, "dur": 1.062, + "args": { + "External id": 989219,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940494536.961, "dur": 1.526, + "args": { + "External id": 989220,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940494537.608, "dur": 0.787, + "args": { + "External id": 989221,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940494554.011, "dur": 2.657, + "args": { + "External id": 989222,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940494567.974, "dur": 2.513, + "args": { + "External id": 989223,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940494737.496, "dur": 4.572, + "args": { + "External id": 989224,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 7207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940494747.113, "dur": 32.265, + "args": { + "External id": 989225,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 7208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940494756.252, "dur": 0.744, + "args": { + "External id": 989226,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 7209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345940494785.536, "dur": 30.838, + "args": { + "External id": 989227,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 7210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940494787.743, "dur": 28.340, + "args": { + "External id": 989228,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 7211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940494792.452, "dur": 4.124, + "args": { + "External id": 989229,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940494800.112, "dur": 15.470, + "args": { + "External id": 989230,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 7213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345940494820.663, "dur": 1.999, + "args": { + "External id": 989231,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 7214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940494821.669, "dur": 0.891, + "args": { + "External id": 989232,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 7215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940494830.291, "dur": 2.540, + "args": { + "External id": 989233,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940494831.554, "dur": 1.117, + "args": { + "External id": 989234,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940494835.636, "dur": 1.673, + "args": { + "External id": 989235,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940494836.273, "dur": 0.955, + "args": { + "External id": 989236,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940494866.156, "dur": 18.752, + "args": { + "External id": 989237,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940494896.666, "dur": 8.311, + "args": { + "External id": 989238,"Record function id": 0, "Ev Idx": 7221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940494899.158, "dur": 5.137, + "args": { + "External id": 989239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940494901.171, "dur": 2.284, + "args": { + "External id": 989240,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940494902.002, "dur": 1.349, + "args": { + "External id": 989241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940494908.631, "dur": 7.451, + "args": { + "External id": 989242,"Record function id": 0, "Sequence number": 10552547, "Fwd thread id": 1, "Ev Idx": 7225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940494909.709, "dur": 3.696, + "args": { + "External id": 989243,"Sequence number": 10552547, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7226 + } + }, + { + "ph": "f", "id": 372, "pid": 2338706, "tid": 2379422, "ts": 6345940494909.709, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940494911.295, "dur": 1.929, + "args": { + "External id": 989244,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940494912.112, "dur": 0.998, + "args": { + "External id": 989245,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940494919.640, "dur": 194.180, + "args": { + "External id": 989246,"Record function id": 0, "Sequence number": 10552546, "Fwd thread id": 1, "Ev Idx": 7229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940494920.807, "dur": 183.738, + "args": { + "External id": 989247,"Sequence number": 10552546, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7230 + } + }, + { + "ph": "f", "id": 373, "pid": 2338706, "tid": 2379422, "ts": 6345940494920.807, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940494925.051, "dur": 6.781, + "args": { + "External id": 989248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940494928.633, "dur": 2.598, + "args": { + "External id": 989249,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 7232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940494930.348, "dur": 0.705, + "args": { + "External id": 989250,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940494933.001, "dur": 57.505, + "args": { + "External id": 989251,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 7234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940494991.545, "dur": 3.720, + "args": { + "External id": 989252,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940494992.335, "dur": 2.304, + "args": { + "External id": 989253,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940494993.770, "dur": 0.735, + "args": { + "External id": 989254,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940494996.904, "dur": 8.259, + "args": { + "External id": 989255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940495000.916, "dur": 3.724, + "args": { + "External id": 989256,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940495001.873, "dur": 2.700, + "args": { + "External id": 989257,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940495005.911, "dur": 97.254, + "args": { + "External id": 989258,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 7241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940495122.188, "dur": 10.289, + "args": { + "External id": 989259,"Record function id": 0, "Sequence number": 10552545, "Fwd thread id": 1, "Ev Idx": 7242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940495123.471, "dur": 6.572, + "args": { + "External id": 989260,"Sequence number": 10552545, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7243 + } + }, + { + "ph": "f", "id": 374, "pid": 2338706, "tid": 2379422, "ts": 6345940495123.471, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940495125.942, "dur": 3.929, + "args": { + "External id": 989261,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940495126.749, "dur": 2.967, + "args": { + "External id": 989262,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940495136.035, "dur": 10.454, + "args": { + "External id": 989263,"Record function id": 0, "Sequence number": 10552544, "Fwd thread id": 1, "Ev Idx": 7246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940495137.098, "dur": 6.975, + "args": { + "External id": 989264,"Sequence number": 10552544, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7247 + } + }, + { + "ph": "f", "id": 375, "pid": 2338706, "tid": 2379422, "ts": 6345940495137.098, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940495138.012, "dur": 5.852, + "args": { + "External id": 989265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940495138.891, "dur": 4.417, + "args": { + "External id": 989266,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940495142.598, "dur": 0.565, + "args": { + "External id": 989267,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940495150.990, "dur": 6.507, + "args": { + "External id": 989268,"Record function id": 0, "Ev Idx": 7251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940495152.434, "dur": 4.442, + "args": { + "External id": 989269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940495154.084, "dur": 2.455, + "args": { + "External id": 989270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940495154.709, "dur": 1.728, + "args": { + "External id": 989271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940495160.786, "dur": 6.051, + "args": { + "External id": 989272,"Record function id": 0, "Sequence number": 10552543, "Fwd thread id": 1, "Ev Idx": 7255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940495161.589, "dur": 3.475, + "args": { + "External id": 989273,"Sequence number": 10552543, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7256 + } + }, + { + "ph": "f", "id": 376, "pid": 2338706, "tid": 2379422, "ts": 6345940495161.589, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940495162.864, "dur": 2.052, + "args": { + "External id": 989274,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940495163.704, "dur": 1.116, + "args": { + "External id": 989275,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940495171.953, "dur": 365.281, + "args": { + "External id": 989276,"Record function id": 0, "Sequence number": 10552542, "Fwd thread id": 1, "Ev Idx": 7259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940495173.105, "dur": 340.695, + "args": { + "External id": 989277,"Sequence number": 10552542, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7260 + } + }, + { + "ph": "f", "id": 377, "pid": 2338706, "tid": 2379422, "ts": 6345940495173.105, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940495192.689, "dur": 7.776, + "args": { + "External id": 989278,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 7261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940495195.722, "dur": 4.114, + "args": { + "External id": 989279,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940495202.718, "dur": 3.808, + "args": { + "External id": 989280,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 7263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940495204.300, "dur": 2.010, + "args": { + "External id": 989281,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940495208.016, "dur": 6.296, + "args": { + "External id": 989282,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 7265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940495210.140, "dur": 3.944, + "args": { + "External id": 989283,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940495240.844, "dur": 246.636, + "args": { + "External id": 989284,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940495326.484, "dur": 3.961, + "args": { + "External id": 989285,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940495332.369, "dur": 3.252, + "args": { + "External id": 989286,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940495336.716, "dur": 1.968, + "args": { + "External id": 989287,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940495339.564, "dur": 2.367, + "args": { + "External id": 989288,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940495387.882, "dur": 3.052, + "args": { + "External id": 989289,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940495389.204, "dur": 1.536, + "args": { + "External id": 989290,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940495392.842, "dur": 28.294, + "args": { + "External id": 989291,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940495398.290, "dur": 2.588, + "args": { + "External id": 989292,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940495422.709, "dur": 3.704, + "args": { + "External id": 989293,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940495425.882, "dur": 0.443, + "args": { + "External id": 989294,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940495427.344, "dur": 14.085, + "args": { + "External id": 989295,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940495429.769, "dur": 0.488, + "args": { + "External id": 989296,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940495500.900, "dur": 3.465, + "args": { + "External id": 989297,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940495507.264, "dur": 0.905, + "args": { + "External id": 989298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2379422, + "ts": 6345940495510.262, "dur": 0.638, + "args": { + "External id": 989299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940495545.176, "dur": 230.317, + "args": { + "External id": 989300,"Record function id": 0, "Sequence number": 10552541, "Fwd thread id": 1, "Ev Idx": 7283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940495546.691, "dur": 221.808, + "args": { + "External id": 989301,"Sequence number": 10552541, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7284 + } + }, + { + "ph": "f", "id": 378, "pid": 2338706, "tid": 2379422, "ts": 6345940495546.691, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345940495567.330, "dur": 46.621, + "args": { + "External id": 989302,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940495571.075, "dur": 4.922, + "args": { + "External id": 989303,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940495577.472, "dur": 35.895, + "args": { + "External id": 989304,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 7287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940495624.493, "dur": 5.341, + "args": { + "External id": 989305,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 7288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940495626.932, "dur": 2.627, + "args": { + "External id": 989306,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940495782.461, "dur": 163.613, + "args": { + "External id": 989307,"Record function id": 0, "Sequence number": 10552540, "Fwd thread id": 1, "Ev Idx": 7290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940495784.576, "dur": 155.170, + "args": { + "External id": 989308,"Sequence number": 10552540, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7291 + } + }, + { + "ph": "f", "id": 379, "pid": 2338706, "tid": 2379422, "ts": 6345940495784.576, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2379422, + "ts": 6345940495796.377, "dur": 43.090, + "args": { + "External id": 989309,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940495799.059, "dur": 2.677, + "args": { + "External id": 989310,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940495802.819, "dur": 36.148, + "args": { + "External id": 989311,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 7294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2379422, + "ts": 6345940495846.999, "dur": 4.863, + "args": { + "External id": 989312,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 7295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940495848.867, "dur": 2.698, + "args": { + "External id": 989313,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940495952.190, "dur": 14.212, + "args": { + "External id": 989314,"Record function id": 0, "Sequence number": 10552539, "Fwd thread id": 1, "Ev Idx": 7297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940495955.858, "dur": 7.692, + "args": { + "External id": 989315,"Sequence number": 10552539, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7298 + } + }, + { + "ph": "f", "id": 380, "pid": 2338706, "tid": 2379422, "ts": 6345940495955.858, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940495958.315, "dur": 4.995, + "args": { + "External id": 989316,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940495959.722, "dur": 3.402, + "args": { + "External id": 989317,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940495969.964, "dur": 7.458, + "args": { + "External id": 989318,"Record function id": 0, "Sequence number": 10552538, "Fwd thread id": 1, "Ev Idx": 7301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940495970.830, "dur": 3.994, + "args": { + "External id": 989319,"Sequence number": 10552538, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7302 + } + }, + { + "ph": "f", "id": 381, "pid": 2338706, "tid": 2379422, "ts": 6345940495970.830, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940495972.583, "dur": 2.082, + "args": { + "External id": 989320,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940495973.604, "dur": 0.962, + "args": { + "External id": 989321,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940495980.623, "dur": 6.680, + "args": { + "External id": 989322,"Record function id": 0, "Sequence number": 10552537, "Fwd thread id": 1, "Ev Idx": 7305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940495981.671, "dur": 3.688, + "args": { + "External id": 989323,"Sequence number": 10552537, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7306 + } + }, + { + "ph": "f", "id": 382, "pid": 2338706, "tid": 2379422, "ts": 6345940495981.671, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940495983.317, "dur": 1.907, + "args": { + "External id": 989324,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940495984.236, "dur": 0.872, + "args": { + "External id": 989325,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940495990.900, "dur": 9.038, + "args": { + "External id": 989326,"Record function id": 0, "Sequence number": 10552536, "Fwd thread id": 1, "Ev Idx": 7309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940495992.440, "dur": 5.682, + "args": { + "External id": 989327,"Sequence number": 10552536, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 7310 + } + }, + { + "ph": "f", "id": 383, "pid": 2338706, "tid": 2379422, "ts": 6345940495992.440, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940495993.896, "dur": 4.077, + "args": { + "External id": 989328,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940495996.853, "dur": 1.019, + "args": { + "External id": 989329,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496003.626, "dur": 218.970, + "args": { + "External id": 989330,"Record function id": 0, "Sequence number": 10552535, "Fwd thread id": 1, "Ev Idx": 7313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496004.780, "dur": 206.344, + "args": { + "External id": 989331,"Sequence number": 10552535, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7314 + } + }, + { + "ph": "f", "id": 384, "pid": 2338706, "tid": 2379422, "ts": 6345940496004.780, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940496029.173, "dur": 9.745, + "args": { + "External id": 989332,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940496031.182, "dur": 6.829, + "args": { + "External id": 989333,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 7316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940496033.737, "dur": 3.876, + "args": { + "External id": 989334,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 7317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940496042.928, "dur": 105.774, + "args": { + "External id": 989335,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 7318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940496151.805, "dur": 5.933, + "args": { + "External id": 989336,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940496152.719, "dur": 3.966, + "args": { + "External id": 989337,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 7320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940496154.875, "dur": 1.661, + "args": { + "External id": 989338,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 7321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940496159.813, "dur": 6.244, + "args": { + "External id": 989339,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940496161.449, "dur": 3.976, + "args": { + "External id": 989340,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940496165.040, "dur": 0.298, + "args": { + "External id": 989341,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940496166.773, "dur": 43.363, + "args": { + "External id": 989342,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 7325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496230.919, "dur": 8.246, + "args": { + "External id": 989343,"Record function id": 0, "Sequence number": 10552534, "Fwd thread id": 1, "Ev Idx": 7326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496232.222, "dur": 5.355, + "args": { + "External id": 989344,"Sequence number": 10552534, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7327 + } + }, + { + "ph": "f", "id": 385, "pid": 2338706, "tid": 2379422, "ts": 6345940496232.222, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940496234.582, "dur": 2.847, + "args": { + "External id": 989345,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940496235.628, "dur": 1.701, + "args": { + "External id": 989346,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496242.532, "dur": 9.936, + "args": { + "External id": 989347,"Record function id": 0, "Sequence number": 10552533, "Fwd thread id": 1, "Ev Idx": 7330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496243.364, "dur": 7.329, + "args": { + "External id": 989348,"Sequence number": 10552533, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7331 + } + }, + { + "ph": "f", "id": 386, "pid": 2338706, "tid": 2379422, "ts": 6345940496243.364, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940496244.676, "dur": 5.779, + "args": { + "External id": 989349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940496247.220, "dur": 2.632, + "args": { + "External id": 989350,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940496249.266, "dur": 0.472, + "args": { + "External id": 989351,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940496258.389, "dur": 11.473, + "args": { + "External id": 989352,"Record function id": 0, "Ev Idx": 7335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940496259.809, "dur": 9.313, + "args": { + "External id": 989353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940496262.462, "dur": 6.267, + "args": { + "External id": 989354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940496263.747, "dur": 4.878, + "args": { + "External id": 989355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496273.232, "dur": 7.677, + "args": { + "External id": 989356,"Record function id": 0, "Sequence number": 10552532, "Fwd thread id": 1, "Ev Idx": 7339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496274.420, "dur": 3.456, + "args": { + "External id": 989357,"Sequence number": 10552532, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 7340 + } + }, + { + "ph": "f", "id": 387, "pid": 2338706, "tid": 2379422, "ts": 6345940496274.420, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940496275.651, "dur": 2.083, + "args": { + "External id": 989358,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940496276.787, "dur": 0.815, + "args": { + "External id": 989359,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496284.160, "dur": 89.208, + "args": { + "External id": 989360,"Record function id": 0, "Sequence number": 10552531, "Fwd thread id": 1, "Ev Idx": 7343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496287.233, "dur": 79.341, + "args": { + "External id": 989361,"Sequence number": 10552531, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7344 + } + }, + { + "ph": "f", "id": 388, "pid": 2338706, "tid": 2379422, "ts": 6345940496287.233, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940496290.367, "dur": 2.869, + "args": { + "External id": 989362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940496290.914, "dur": 1.827, + "args": { + "External id": 989363,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 7346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940496291.920, "dur": 0.710, + "args": { + "External id": 989364,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 7347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940496293.826, "dur": 26.426, + "args": { + "External id": 989365,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 7348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940496321.292, "dur": 6.419, + "args": { + "External id": 989366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940496321.978, "dur": 5.097, + "args": { + "External id": 989367,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 7350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940496325.380, "dur": 1.553, + "args": { + "External id": 989368,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 7351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940496329.085, "dur": 3.675, + "args": { + "External id": 989369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940496330.497, "dur": 1.770, + "args": { + "External id": 989370,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940496331.709, "dur": 0.490, + "args": { + "External id": 989371,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940496333.327, "dur": 32.428, + "args": { + "External id": 989372,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 7355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496377.878, "dur": 76.503, + "args": { + "External id": 989373,"Record function id": 0, "Sequence number": 10552530, "Fwd thread id": 1, "Ev Idx": 7356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496417.307, "dur": 6.707, + "args": { + "External id": 989374,"Sequence number": 10552530, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7357 + } + }, + { + "ph": "f", "id": 389, "pid": 2338706, "tid": 2379422, "ts": 6345940496417.307, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940496421.238, "dur": 2.633, + "args": { + "External id": 989375,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940496422.154, "dur": 1.590, + "args": { + "External id": 989376,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2379422, + "ts": 6345940496427.042, "dur": 24.757, + "args": { + "External id": 989377,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496458.564, "dur": 11.732, + "args": { + "External id": 989378,"Record function id": 0, "Sequence number": 10552529, "Fwd thread id": 1, "Ev Idx": 7361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496459.581, "dur": 8.012, + "args": { + "External id": 989379,"Sequence number": 10552529, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7362 + } + }, + { + "ph": "f", "id": 390, "pid": 2338706, "tid": 2379422, "ts": 6345940496459.581, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940496462.995, "dur": 4.398, + "args": { + "External id": 989380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940496464.469, "dur": 2.299, + "args": { + "External id": 989381,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940496466.066, "dur": 0.546, + "args": { + "External id": 989382,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940496474.502, "dur": 5.978, + "args": { + "External id": 989383,"Record function id": 0, "Ev Idx": 7366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940496475.941, "dur": 4.041, + "args": { + "External id": 989384,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940496476.917, "dur": 2.511, + "args": { + "External id": 989385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940496477.929, "dur": 1.393, + "args": { + "External id": 989386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496484.151, "dur": 7.474, + "args": { + "External id": 989387,"Record function id": 0, "Sequence number": 10552528, "Fwd thread id": 1, "Ev Idx": 7370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496485.345, "dur": 3.483, + "args": { + "External id": 989388,"Sequence number": 10552528, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7371 + } + }, + { + "ph": "f", "id": 391, "pid": 2338706, "tid": 2379422, "ts": 6345940496485.345, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940496486.789, "dur": 1.896, + "args": { + "External id": 989389,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940496487.542, "dur": 1.064, + "args": { + "External id": 989390,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496494.900, "dur": 99.219, + "args": { + "External id": 989391,"Record function id": 0, "Sequence number": 10552527, "Fwd thread id": 1, "Ev Idx": 7374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496498.339, "dur": 87.175, + "args": { + "External id": 989392,"Sequence number": 10552527, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7375 + } + }, + { + "ph": "f", "id": 392, "pid": 2338706, "tid": 2379422, "ts": 6345940496498.339, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940496500.930, "dur": 3.135, + "args": { + "External id": 989393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940496501.669, "dur": 1.937, + "args": { + "External id": 989394,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 7377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940496502.878, "dur": 0.598, + "args": { + "External id": 989395,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940496504.698, "dur": 34.950, + "args": { + "External id": 989396,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 7379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940496540.624, "dur": 5.278, + "args": { + "External id": 989397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940496541.125, "dur": 4.183, + "args": { + "External id": 989398,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940496544.645, "dur": 0.540, + "args": { + "External id": 989399,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940496547.354, "dur": 3.771, + "args": { + "External id": 989400,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940496548.328, "dur": 2.149, + "args": { + "External id": 989401,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940496549.861, "dur": 0.509, + "args": { + "External id": 989402,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940496551.578, "dur": 33.294, + "args": { + "External id": 989403,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 7386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496598.982, "dur": 29.459, + "args": { + "External id": 989404,"Record function id": 0, "Sequence number": 10552526, "Fwd thread id": 1, "Ev Idx": 7387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496600.097, "dur": 8.337, + "args": { + "External id": 989405,"Sequence number": 10552526, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7388 + } + }, + { + "ph": "f", "id": 393, "pid": 2338706, "tid": 2379422, "ts": 6345940496600.097, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940496604.147, "dur": 4.096, + "args": { + "External id": 989406,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940496605.281, "dur": 2.821, + "args": { + "External id": 989407,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940496610.868, "dur": 15.141, + "args": { + "External id": 989408,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496632.169, "dur": 9.030, + "args": { + "External id": 989409,"Record function id": 0, "Sequence number": 10552525, "Fwd thread id": 1, "Ev Idx": 7392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338706, "tid": 2379422, + "ts": 6345940496633.609, "dur": 4.673, + "args": { + "External id": 989410,"Sequence number": 10552525, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7393 + } + }, + { + "ph": "f", "id": 394, "pid": 2338706, "tid": 2379422, "ts": 6345940496633.609, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2379422, + "ts": 6345940496634.458, "dur": 3.644, + "args": { + "External id": 989411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2379422, + "ts": 6345940496635.193, "dur": 2.324, + "args": { + "External id": 989412,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940496637.012, "dur": 0.362, + "args": { + "External id": 989413,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940496645.009, "dur": 5.290, + "args": { + "External id": 989414,"Record function id": 0, "Ev Idx": 7397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940496646.528, "dur": 3.284, + "args": { + "External id": 989415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940496647.685, "dur": 1.653, + "args": { + "External id": 989416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940496648.265, "dur": 0.984, + "args": { + "External id": 989417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940496654.435, "dur": 451.020, + "args": { + "External id": 989418,"Record function id": 0, "Sequence number": 10552524, "Fwd thread id": 1, "Ev Idx": 7401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940496656.035, "dur": 370.095, + "args": { + "External id": 989419,"Sequence number": 10552524, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7402 + } + }, + { + "ph": "f", "id": 395, "pid": 2338706, "tid": 2379422, "ts": 6345940496656.035, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940496693.888, "dur": 1.941, + "args": { + "External id": 989420,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940496694.828, "dur": 0.871, + "args": { + "External id": 989421,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940496710.742, "dur": 4.233, + "args": { + "External id": 989422,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940496724.685, "dur": 2.194, + "args": { + "External id": 989423,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940496876.722, "dur": 1.792, + "args": { + "External id": 989424,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 7407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940496895.285, "dur": 40.065, + "args": { + "External id": 989425,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 7408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940496910.616, "dur": 0.951, + "args": { + "External id": 989426,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 7409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345940496941.218, "dur": 40.923, + "args": { + "External id": 989427,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 7410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940496945.537, "dur": 36.365, + "args": { + "External id": 989428,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 7411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940496950.766, "dur": 4.884, + "args": { + "External id": 989429,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940496957.473, "dur": 23.715, + "args": { + "External id": 989430,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 7413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2379422, + "ts": 6345940496986.499, "dur": 2.617, + "args": { + "External id": 989431,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 7414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940496987.729, "dur": 1.273, + "args": { + "External id": 989432,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 7415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940496995.783, "dur": 5.118, + "args": { + "External id": 989433,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940496999.334, "dur": 1.381, + "args": { + "External id": 989434,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940497044.975, "dur": 53.981, + "args": { + "External id": 989435,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940497120.750, "dur": 10.636, + "args": { + "External id": 989436,"Record function id": 0, "Ev Idx": 7419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940497123.021, "dur": 7.505, + "args": { + "External id": 989437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940497125.996, "dur": 3.689, + "args": { + "External id": 989438,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940497127.094, "dur": 2.469, + "args": { + "External id": 989439,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940497135.999, "dur": 2818.532, + "args": { + "External id": 989440,"Record function id": 0, "Ev Idx": 7423 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.25)", "pid": 2338706, "tid": 2379422, + "ts": 6345940497170.169, "dur": 966.286, + "args": { + "External id": 989441,"Record function id": 0, "Ev Idx": 7424 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.24", "pid": 2338706, "tid": 2379422, + "ts": 6345940497196.817, "dur": 931.082, + "args": { + "External id": 989442,"Record function id": 0, "Ev Idx": 7425 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338706, "tid": 2379422, + "ts": 6345940497214.062, "dur": 896.984, + "args": { + "External id": 989443,"Record function id": 0, "Ev Idx": 7426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940497308.457, "dur": 6.729, + "args": { + "External id": 989444,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345940497333.259, "dur": 35.724, + "args": { + "External id": 989445,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940497339.448, "dur": 1.220, + "args": { + "External id": 989446,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940497342.643, "dur": 0.440, + "args": { + "External id": 989447,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940497344.217, "dur": 0.622, + "args": { + "External id": 989448,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940497348.686, "dur": 0.461, + "args": { + "External id": 989449,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940497350.173, "dur": 2.871, + "args": { + "External id": 989450,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940497355.170, "dur": 2.665, + "args": { + "External id": 989451,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940497359.003, "dur": 0.324, + "args": { + "External id": 989452,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940497361.078, "dur": 0.390, + "args": { + "External id": 989453,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940497364.621, "dur": 0.256, + "args": { + "External id": 989454,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940497381.705, "dur": 44.903, + "args": { + "External id": 989455,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345940497467.712, "dur": 122.578, + "args": { + "External id": 989456,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940497478.860, "dur": 3.217, + "args": { + "External id": 989457,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345940497488.163, "dur": 10.119, + "args": { + "External id": 989458,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345940497492.325, "dur": 5.522, + "args": { + "External id": 989459,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940497496.031, "dur": 0.564, + "args": { + "External id": 989460,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345940497505.120, "dur": 25.543, + "args": { + "External id": 989461,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940497506.680, "dur": 0.561, + "args": { + "External id": 989462,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940497508.825, "dur": 2.138, + "args": { + "External id": 989463,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940497514.037, "dur": 0.268, + "args": { + "External id": 989464,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940497515.437, "dur": 0.410, + "args": { + "External id": 989465,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940497516.861, "dur": 0.520, + "args": { + "External id": 989466,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940497520.125, "dur": 0.566, + "args": { + "External id": 989467,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940497521.591, "dur": 0.225, + "args": { + "External id": 989468,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940497522.864, "dur": 2.013, + "args": { + "External id": 989469,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940497526.077, "dur": 0.337, + "args": { + "External id": 989470,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940497544.889, "dur": 35.986, + "args": { + "External id": 989471,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345940497652.184, "dur": 301.443, + "args": { + "External id": 989472,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345940497684.538, "dur": 264.179, + "args": { + "External id": 989473,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7456, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345940497694.998, "dur": 247.514, + "args": { + "External id": 989474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345940497974.953, "dur": 2.386, + "args": { + "External id": 989475,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7458, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940498145.213, "dur": 1783.326, + "args": { + "External id": 989476,"Sequence number": 10552523, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7459 + } + }, + { + "ph": "f", "id": 396, "pid": 2338706, "tid": 2379422, "ts": 6345940498145.213, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940498271.879, "dur": 112.187, + "args": { + "External id": 989477,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345940498431.860, "dur": 39.661, + "args": { + "External id": 989478,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345940498493.126, "dur": 49.441, + "args": { + "External id": 989479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940498552.683, "dur": 31.518, + "args": { + "External id": 989480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940498590.603, "dur": 40.093, + "args": { + "External id": 989481,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940498637.378, "dur": 27.900, + "args": { + "External id": 989482,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940498675.101, "dur": 29.598, + "args": { + "External id": 989483,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345940498730.943, "dur": 24.251, + "args": { + "External id": 989484,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345940498777.723, "dur": 31.392, + "args": { + "External id": 989485,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345940498832.540, "dur": 19.639, + "args": { + "External id": 989486,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345940498867.945, "dur": 16.461, + "args": { + "External id": 989487,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940498894.578, "dur": 34.689, + "args": { + "External id": 989488,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940498933.118, "dur": 31.391, + "args": { + "External id": 989489,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940498993.484, "dur": 316.109, + "args": { + "External id": 989490,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940499133.384, "dur": 8.951, + "args": { + "External id": 989491,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940499144.536, "dur": 2.386, + "args": { + "External id": 989492,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940499147.825, "dur": 2.172, + "args": { + "External id": 989493,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940499151.455, "dur": 6.268, + "args": { + "External id": 989494,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940499203.802, "dur": 7.481, + "args": { + "External id": 989495,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940499208.215, "dur": 2.912, + "args": { + "External id": 989496,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940499212.975, "dur": 31.857, + "args": { + "External id": 989497,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940499218.951, "dur": 1.691, + "args": { + "External id": 989498,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940499246.414, "dur": 2.122, + "args": { + "External id": 989499,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940499247.776, "dur": 0.675, + "args": { + "External id": 989500,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940499249.290, "dur": 15.113, + "args": { + "External id": 989501,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940499251.314, "dur": 0.478, + "args": { + "External id": 989502,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345940499351.295, "dur": 30.260, + "args": { + "External id": 989503,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345940499402.812, "dur": 17.927, + "args": { + "External id": 989504,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940499428.617, "dur": 50.746, + "args": { + "External id": 989505,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940499486.561, "dur": 39.623, + "args": { + "External id": 989506,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940499535.613, "dur": 19.789, + "args": { + "External id": 989507,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940499560.685, "dur": 30.565, + "args": { + "External id": 989508,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940499598.163, "dur": 37.952, + "args": { + "External id": 989509,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940499650.565, "dur": 39.641, + "args": { + "External id": 989510,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345940499715.876, "dur": 25.807, + "args": { + "External id": 989511,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345940499759.387, "dur": 30.402, + "args": { + "External id": 989512,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345940499806.947, "dur": 18.002, + "args": { + "External id": 989513,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345940499843.834, "dur": 15.731, + "args": { + "External id": 989514,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345940499876.577, "dur": 17.634, + "args": { + "External id": 989515,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 7498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940499977.067, "dur": 15.991, + "args": { + "External id": 989516,"Record function id": 0, "Ev Idx": 7499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940499981.909, "dur": 10.217, + "args": { + "External id": 989517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940499986.104, "dur": 5.134, + "args": { + "External id": 989518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940499987.612, "dur": 3.523, + "args": { + "External id": 989519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940499997.064, "dur": 4.991, + "args": { + "External id": 989520,"Record function id": 0, "Ev Idx": 7503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940499998.797, "dur": 2.744, + "args": { + "External id": 989521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940499999.408, "dur": 1.663, + "args": { + "External id": 989522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940499999.929, "dur": 1.046, + "args": { + "External id": 989523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940500005.410, "dur": 27.963, + "args": { + "External id": 989524,"Record function id": 0, "Ev Idx": 7507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940500007.031, "dur": 25.281, + "args": { + "External id": 989525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940500026.569, "dur": 4.893, + "args": { + "External id": 989526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940500027.726, "dur": 3.452, + "args": { + "External id": 989527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940500038.475, "dur": 4.652, + "args": { + "External id": 989528,"Record function id": 0, "Ev Idx": 7511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940500040.351, "dur": 2.354, + "args": { + "External id": 989529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940500040.895, "dur": 1.365, + "args": { + "External id": 989530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940500041.318, "dur": 0.880, + "args": { + "External id": 989531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940500046.349, "dur": 4.377, + "args": { + "External id": 989532,"Record function id": 0, "Ev Idx": 7515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940500047.935, "dur": 2.319, + "args": { + "External id": 989533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940500048.425, "dur": 1.119, + "args": { + "External id": 989534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940500048.849, "dur": 0.631, + "args": { + "External id": 989535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940500091.536, "dur": 7.167, + "args": { + "External id": 989536,"Record function id": 0, "Ev Idx": 7519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940500093.702, "dur": 4.277, + "args": { + "External id": 989537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940500094.639, "dur": 2.048, + "args": { + "External id": 989538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940500095.213, "dur": 1.275, + "args": { + "External id": 989539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940500102.086, "dur": 4.807, + "args": { + "External id": 989540,"Record function id": 0, "Ev Idx": 7523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940500103.675, "dur": 2.764, + "args": { + "External id": 989541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940500104.528, "dur": 1.330, + "args": { + "External id": 989542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940500105.164, "dur": 0.621, + "args": { + "External id": 989543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940500110.182, "dur": 6.740, + "args": { + "External id": 989544,"Record function id": 0, "Ev Idx": 7527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940500111.813, "dur": 4.676, + "args": { + "External id": 989545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940500112.275, "dur": 3.496, + "args": { + "External id": 989546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940500114.924, "dur": 0.782, + "args": { + "External id": 989547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940500120.061, "dur": 5.104, + "args": { + "External id": 989548,"Record function id": 0, "Ev Idx": 7531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940500122.113, "dur": 2.626, + "args": { + "External id": 989549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940500122.672, "dur": 1.295, + "args": { + "External id": 989550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940500123.170, "dur": 0.736, + "args": { + "External id": 989551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940500128.864, "dur": 282457.968, + "args": { + "External id": 989552,"Record function id": 0, "Sequence number": 10552522, "Fwd thread id": 1, "Ev Idx": 7535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940500130.252, "dur": 282447.215, + "args": { + "External id": 989553,"Sequence number": 10552522, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7536 + } + }, + { + "ph": "f", "id": 397, "pid": 2338706, "tid": 2379422, "ts": 6345940500130.252, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.25)", "pid": 2338706, "tid": 2379422, + "ts": 6345940500165.807, "dur": 42.679, + "args": { + "External id": 989554,"Record function id": 0, "Ev Idx": 7537 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.25)", "pid": 2338706, "tid": 2379422, + "ts": 6345940500217.196, "dur": 87.684, + "args": { + "External id": 989555,"Record function id": 0, "Ev Idx": 7538 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.25)", "pid": 2338706, "tid": 2379422, + "ts": 6345940500310.431, "dur": 282258.310, + "args": { + "External id": 989556,"Record function id": 0, "Ev Idx": 7539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940500374.178, "dur": 8.197, + "args": { + "External id": 989557,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940500395.015, "dur": 6.604, + "args": { + "External id": 989558,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345940500417.044, "dur": 281144.991, + "args": { + "External id": 989559,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345940500434.306, "dur": 281112.954, + "args": { + "External id": 989560,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940500573.736, "dur": 4.707, + "args": { + "External id": 989561,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345940500599.281, "dur": 280896.267, + "args": { + "External id": 989562,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 7545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940500602.074, "dur": 280892.089, + "args": { + "External id": 989563,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 7546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940500606.407, "dur": 10.097, + "args": { + "External id": 989564,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940500618.487, "dur": 280869.994, + "args": { + "External id": 989565,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 7548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940781683.837, "dur": 13.701, + "args": { + "External id": 989566,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 7549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940781687.592, "dur": 9.533, + "args": { + "External id": 989567,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345940781734.470, "dur": 402.893, + "args": { + "External id": 989568,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 7551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345940781769.814, "dur": 361.542, + "args": { + "External id": 989569,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7552, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345940781783.488, "dur": 341.078, + "args": { + "External id": 989570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 7553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345940782166.226, "dur": 2.619, + "args": { + "External id": 989571,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7554, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940782238.907, "dur": 6.962, + "args": { + "External id": 989572,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940782260.555, "dur": 42.624, + "args": { + "External id": 989573,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940782315.627, "dur": 1.643, + "args": { + "External id": 989574,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940782323.872, "dur": 12.816, + "args": { + "External id": 989575,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940782342.803, "dur": 0.969, + "args": { + "External id": 989576,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940782348.291, "dur": 14.253, + "args": { + "External id": 989577,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940782367.927, "dur": 0.983, + "args": { + "External id": 989578,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940782374.052, "dur": 11.615, + "args": { + "External id": 989579,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940782390.240, "dur": 3.356, + "args": { + "External id": 989580,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940782398.052, "dur": 13.307, + "args": { + "External id": 989581,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940782416.221, "dur": 1.746, + "args": { + "External id": 989582,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940782425.490, "dur": 12.106, + "args": { + "External id": 989583,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940782442.152, "dur": 0.840, + "args": { + "External id": 989584,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940782447.328, "dur": 12.806, + "args": { + "External id": 989585,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940782464.586, "dur": 0.764, + "args": { + "External id": 989586,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940782470.439, "dur": 11.764, + "args": { + "External id": 989587,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940782488.868, "dur": 0.833, + "args": { + "External id": 989588,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940782494.563, "dur": 12.343, + "args": { + "External id": 989589,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 7572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940782602.678, "dur": 3110.625, + "args": { + "External id": 989590,"Record function id": 0, "Ev Idx": 7573 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.24)", "pid": 2338706, "tid": 2379422, + "ts": 6345940782626.011, "dur": 1146.427, + "args": { + "External id": 989591,"Record function id": 0, "Ev Idx": 7574 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338706, "tid": 2379422, + "ts": 6345940782641.829, "dur": 359.507, + "args": { + "External id": 989592,"Record function id": 0, "Ev Idx": 7575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940782733.002, "dur": 4.310, + "args": { + "External id": 989593,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940782740.977, "dur": 0.818, + "args": { + "External id": 989594,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940782743.488, "dur": 0.749, + "args": { + "External id": 989595,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940782746.522, "dur": 2.560, + "args": { + "External id": 989596,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940782750.515, "dur": 0.876, + "args": { + "External id": 989597,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940782753.736, "dur": 0.798, + "args": { + "External id": 989598,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940782758.641, "dur": 0.899, + "args": { + "External id": 989599,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940782778.955, "dur": 1.302, + "args": { + "External id": 989600,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940782782.017, "dur": 1.105, + "args": { + "External id": 989601,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940782784.665, "dur": 0.933, + "args": { + "External id": 989602,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940782807.222, "dur": 162.280, + "args": { + "External id": 989603,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940782825.039, "dur": 139.626, + "args": { + "External id": 989604,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940782844.496, "dur": 15.618, + "args": { + "External id": 989605,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345940782865.912, "dur": 69.784, + "args": { + "External id": 989606,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 7589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940782868.929, "dur": 66.443, + "args": { + "External id": 989607,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 7590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940782872.717, "dur": 7.905, + "args": { + "External id": 989608,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940782882.471, "dur": 52.178, + "args": { + "External id": 989609,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 7592 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.23", "pid": 2338706, "tid": 2379422, + "ts": 6345940783159.156, "dur": 604.907, + "args": { + "External id": 989610,"Record function id": 0, "Ev Idx": 7593 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338706, "tid": 2379422, + "ts": 6345940783178.252, "dur": 572.982, + "args": { + "External id": 989611,"Record function id": 0, "Ev Idx": 7594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940783246.816, "dur": 6.780, + "args": { + "External id": 989612,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345940783270.619, "dur": 31.563, + "args": { + "External id": 989613,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940783276.270, "dur": 1.660, + "args": { + "External id": 989614,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940783280.262, "dur": 1.897, + "args": { + "External id": 989615,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940783283.568, "dur": 0.322, + "args": { + "External id": 989616,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940783285.265, "dur": 0.336, + "args": { + "External id": 989617,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940783288.183, "dur": 0.566, + "args": { + "External id": 989618,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940783290.084, "dur": 0.317, + "args": { + "External id": 989619,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940783291.677, "dur": 2.415, + "args": { + "External id": 989620,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940783296.911, "dur": 0.286, + "args": { + "External id": 989621,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940783298.111, "dur": 0.346, + "args": { + "External id": 989622,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940783313.978, "dur": 48.909, + "args": { + "External id": 989623,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345940783394.312, "dur": 114.200, + "args": { + "External id": 989624,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940783405.012, "dur": 3.256, + "args": { + "External id": 989625,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345940783413.792, "dur": 11.095, + "args": { + "External id": 989626,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345940783418.435, "dur": 5.970, + "args": { + "External id": 989627,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940783422.325, "dur": 0.798, + "args": { + "External id": 989628,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345940783430.961, "dur": 25.201, + "args": { + "External id": 989629,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940783432.958, "dur": 0.636, + "args": { + "External id": 989630,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940783437.001, "dur": 0.466, + "args": { + "External id": 989631,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940783438.641, "dur": 0.245, + "args": { + "External id": 989632,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940783440.491, "dur": 2.396, + "args": { + "External id": 989633,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940783443.842, "dur": 0.279, + "args": { + "External id": 989634,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940783445.813, "dur": 0.304, + "args": { + "External id": 989635,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940783448.848, "dur": 0.327, + "args": { + "External id": 989636,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940783450.690, "dur": 0.472, + "args": { + "External id": 989637,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940783452.660, "dur": 0.272, + "args": { + "External id": 989638,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940783469.304, "dur": 31.431, + "args": { + "External id": 989639,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345940783555.677, "dur": 123.284, + "args": { + "External id": 989640,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345940783586.235, "dur": 89.244, + "args": { + "External id": 989641,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7624, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345940783596.063, "dur": 74.821, + "args": { + "External id": 989642,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345940783697.825, "dur": 1.977, + "args": { + "External id": 989643,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7626, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940783780.906, "dur": 1902.148, + "args": { + "External id": 989644,"Sequence number": 10552521, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7627 + } + }, + { + "ph": "f", "id": 398, "pid": 2338706, "tid": 2379422, "ts": 6345940783780.906, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940783894.676, "dur": 108.698, + "args": { + "External id": 989645,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345940784109.342, "dur": 47.517, + "args": { + "External id": 989646,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345940784179.270, "dur": 62.942, + "args": { + "External id": 989647,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940784254.333, "dur": 33.362, + "args": { + "External id": 989648,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940784296.964, "dur": 34.558, + "args": { + "External id": 989649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940784339.712, "dur": 27.859, + "args": { + "External id": 989650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940784375.738, "dur": 29.322, + "args": { + "External id": 989651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345940784433.147, "dur": 27.260, + "args": { + "External id": 989652,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345940784484.760, "dur": 31.923, + "args": { + "External id": 989653,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345940784538.935, "dur": 21.412, + "args": { + "External id": 989654,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345940784576.560, "dur": 19.382, + "args": { + "External id": 989655,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940784604.318, "dur": 35.978, + "args": { + "External id": 989656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940784644.075, "dur": 33.003, + "args": { + "External id": 989657,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940784710.175, "dur": 262.767, + "args": { + "External id": 989658,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940784794.878, "dur": 6.802, + "args": { + "External id": 989659,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940784803.790, "dur": 3.297, + "args": { + "External id": 989660,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940784808.194, "dur": 2.144, + "args": { + "External id": 989661,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940784811.471, "dur": 3.266, + "args": { + "External id": 989662,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940784859.978, "dur": 5.126, + "args": { + "External id": 989663,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940784862.057, "dur": 2.884, + "args": { + "External id": 989664,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940784866.893, "dur": 35.282, + "args": { + "External id": 989665,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940784872.862, "dur": 2.691, + "args": { + "External id": 989666,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940784903.729, "dur": 1.402, + "args": { + "External id": 989667,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940784904.518, "dur": 0.513, + "args": { + "External id": 989668,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940784905.851, "dur": 19.255, + "args": { + "External id": 989669,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940784908.657, "dur": 2.796, + "args": { + "External id": 989670,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345940785030.292, "dur": 70.228, + "args": { + "External id": 989671,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345940785125.550, "dur": 31.479, + "args": { + "External id": 989672,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940785174.751, "dur": 53.785, + "args": { + "External id": 989673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940785235.349, "dur": 42.744, + "args": { + "External id": 989674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940785288.284, "dur": 21.613, + "args": { + "External id": 989675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940785318.406, "dur": 32.781, + "args": { + "External id": 989676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940785357.903, "dur": 27.590, + "args": { + "External id": 989677,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940785391.782, "dur": 30.124, + "args": { + "External id": 989678,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345940785445.233, "dur": 28.690, + "args": { + "External id": 989679,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345940785493.844, "dur": 26.960, + "args": { + "External id": 989680,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345940785535.788, "dur": 21.025, + "args": { + "External id": 989681,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345940785576.789, "dur": 16.634, + "args": { + "External id": 989682,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345940785631.767, "dur": 20.013, + "args": { + "External id": 989683,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 7666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940785737.194, "dur": 17.849, + "args": { + "External id": 989684,"Record function id": 0, "Ev Idx": 7667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940785740.856, "dur": 13.138, + "args": { + "External id": 989685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940785745.538, "dur": 7.393, + "args": { + "External id": 989686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940785747.218, "dur": 5.615, + "args": { + "External id": 989687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940785758.878, "dur": 4.935, + "args": { + "External id": 989688,"Record function id": 0, "Ev Idx": 7671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940785760.609, "dur": 2.741, + "args": { + "External id": 989689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940785761.398, "dur": 1.274, + "args": { + "External id": 989690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940785761.746, "dur": 0.836, + "args": { + "External id": 989691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940785767.098, "dur": 7.412, + "args": { + "External id": 989692,"Record function id": 0, "Ev Idx": 7675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940785768.463, "dur": 5.561, + "args": { + "External id": 989693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940785769.233, "dur": 4.147, + "args": { + "External id": 989694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940785769.864, "dur": 3.454, + "args": { + "External id": 989695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940785777.678, "dur": 71.387, + "args": { + "External id": 989696,"Record function id": 0, "Ev Idx": 7679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940785844.933, "dur": 3.419, + "args": { + "External id": 989697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940785846.103, "dur": 1.586, + "args": { + "External id": 989698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940785846.691, "dur": 0.923, + "args": { + "External id": 989699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940785852.705, "dur": 11.566, + "args": { + "External id": 989700,"Record function id": 0, "Ev Idx": 7683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940785861.231, "dur": 2.607, + "args": { + "External id": 989701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940785861.768, "dur": 1.418, + "args": { + "External id": 989702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940785862.251, "dur": 0.858, + "args": { + "External id": 989703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940785867.436, "dur": 6.304, + "args": { + "External id": 989704,"Record function id": 0, "Ev Idx": 7687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940785868.656, "dur": 4.615, + "args": { + "External id": 989705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940785869.191, "dur": 3.267, + "args": { + "External id": 989706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940785871.687, "dur": 0.648, + "args": { + "External id": 989707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940785877.443, "dur": 3.627, + "args": { + "External id": 989708,"Record function id": 0, "Ev Idx": 7691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940785878.592, "dur": 2.055, + "args": { + "External id": 989709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940785879.130, "dur": 1.076, + "args": { + "External id": 989710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940785879.509, "dur": 0.604, + "args": { + "External id": 989711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940785884.640, "dur": 4.671, + "args": { + "External id": 989712,"Record function id": 0, "Ev Idx": 7695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940785886.076, "dur": 2.815, + "args": { + "External id": 989713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940785886.640, "dur": 1.478, + "args": { + "External id": 989714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940785887.179, "dur": 0.872, + "args": { + "External id": 989715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940785892.640, "dur": 3.863, + "args": { + "External id": 989716,"Record function id": 0, "Ev Idx": 7699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940785893.853, "dur": 2.245, + "args": { + "External id": 989717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940785894.459, "dur": 1.186, + "args": { + "External id": 989718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940785894.842, "dur": 0.729, + "args": { + "External id": 989719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940785900.723, "dur": 77983.715, + "args": { + "External id": 989720,"Record function id": 0, "Sequence number": 10552520, "Fwd thread id": 1, "Ev Idx": 7703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940785902.033, "dur": 77972.785, + "args": { + "External id": 989721,"Sequence number": 10552520, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7704 + } + }, + { + "ph": "f", "id": 399, "pid": 2338706, "tid": 2379422, "ts": 6345940785902.033, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.24)", "pid": 2338706, "tid": 2379422, + "ts": 6345940785937.080, "dur": 41.110, + "args": { + "External id": 989722,"Record function id": 0, "Ev Idx": 7705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.24)", "pid": 2338706, "tid": 2379422, + "ts": 6345940785986.722, "dur": 125.667, + "args": { + "External id": 989723,"Record function id": 0, "Ev Idx": 7706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.24)", "pid": 2338706, "tid": 2379422, + "ts": 6345940786120.684, "dur": 77746.465, + "args": { + "External id": 989724,"Record function id": 0, "Ev Idx": 7707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940786220.939, "dur": 8.647, + "args": { + "External id": 989725,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940786240.809, "dur": 7.349, + "args": { + "External id": 989726,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345940786266.022, "dur": 76580.336, + "args": { + "External id": 989727,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345940786281.029, "dur": 76551.704, + "args": { + "External id": 989728,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940786408.558, "dur": 19.935, + "args": { + "External id": 989729,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345940786448.109, "dur": 76338.724, + "args": { + "External id": 989730,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 7713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940786451.084, "dur": 76334.506, + "args": { + "External id": 989731,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 7714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940786457.367, "dur": 8.640, + "args": { + "External id": 989732,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940786468.269, "dur": 76311.875, + "args": { + "External id": 989733,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 7716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940862958.193, "dur": 12.713, + "args": { + "External id": 989734,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 7717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940862962.030, "dur": 8.381, + "args": { + "External id": 989735,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345940863001.334, "dur": 421.958, + "args": { + "External id": 989736,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 7719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345940863050.165, "dur": 366.678, + "args": { + "External id": 989737,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7720, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345940863094.903, "dur": 313.936, + "args": { + "External id": 989738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 7721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345940863459.437, "dur": 3.797, + "args": { + "External id": 989739,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7722, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940863531.285, "dur": 7.399, + "args": { + "External id": 989740,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940863552.088, "dur": 37.896, + "args": { + "External id": 989741,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940863600.542, "dur": 1.393, + "args": { + "External id": 989742,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940863608.366, "dur": 13.812, + "args": { + "External id": 989743,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940863628.559, "dur": 0.945, + "args": { + "External id": 989744,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940863636.211, "dur": 12.571, + "args": { + "External id": 989745,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940863653.514, "dur": 0.862, + "args": { + "External id": 989746,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940863659.115, "dur": 12.718, + "args": { + "External id": 989747,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940863676.235, "dur": 3.426, + "args": { + "External id": 989748,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940863682.942, "dur": 12.339, + "args": { + "External id": 989749,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940863699.576, "dur": 1.281, + "args": { + "External id": 989750,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940863705.336, "dur": 12.100, + "args": { + "External id": 989751,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940863724.102, "dur": 0.845, + "args": { + "External id": 989752,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940863741.786, "dur": 13.370, + "args": { + "External id": 989753,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940863761.583, "dur": 0.813, + "args": { + "External id": 989754,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940863767.304, "dur": 11.605, + "args": { + "External id": 989755,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940863783.403, "dur": 0.852, + "args": { + "External id": 989756,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940863790.906, "dur": 13.679, + "args": { + "External id": 989757,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 7740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940863899.336, "dur": 2953.167, + "args": { + "External id": 989758,"Record function id": 0, "Ev Idx": 7741 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.23)", "pid": 2338706, "tid": 2379422, + "ts": 6345940863921.172, "dur": 1117.479, + "args": { + "External id": 989759,"Record function id": 0, "Ev Idx": 7742 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338706, "tid": 2379422, + "ts": 6345940863936.844, "dur": 391.214, + "args": { + "External id": 989760,"Record function id": 0, "Ev Idx": 7743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940864046.191, "dur": 5.005, + "args": { + "External id": 989761,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940864087.919, "dur": 1.484, + "args": { + "External id": 989762,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940864092.412, "dur": 1.108, + "args": { + "External id": 989763,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940864095.789, "dur": 3.015, + "args": { + "External id": 989764,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940864100.753, "dur": 0.994, + "args": { + "External id": 989765,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940864103.568, "dur": 0.668, + "args": { + "External id": 989766,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940864105.991, "dur": 0.690, + "args": { + "External id": 989767,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940864110.297, "dur": 0.929, + "args": { + "External id": 989768,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940864112.969, "dur": 0.841, + "args": { + "External id": 989769,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940864115.052, "dur": 0.647, + "args": { + "External id": 989770,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940864134.255, "dur": 160.896, + "args": { + "External id": 989771,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940864153.421, "dur": 136.956, + "args": { + "External id": 989772,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940864169.795, "dur": 15.375, + "args": { + "External id": 989773,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345940864192.283, "dur": 70.000, + "args": { + "External id": 989774,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 7757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940864195.062, "dur": 66.891, + "args": { + "External id": 989775,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 7758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864200.600, "dur": 7.670, + "args": { + "External id": 989776,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940864210.111, "dur": 51.262, + "args": { + "External id": 989777,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 7760 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.22", "pid": 2338706, "tid": 2379422, + "ts": 6345940864422.944, "dur": 605.043, + "args": { + "External id": 989778,"Record function id": 0, "Ev Idx": 7761 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338706, "tid": 2379422, + "ts": 6345940864442.689, "dur": 552.466, + "args": { + "External id": 989779,"Record function id": 0, "Ev Idx": 7762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940864507.949, "dur": 4.950, + "args": { + "External id": 989780,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345940864530.621, "dur": 30.719, + "args": { + "External id": 989781,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864535.345, "dur": 1.697, + "args": { + "External id": 989782,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864539.060, "dur": 1.645, + "args": { + "External id": 989783,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864541.617, "dur": 0.508, + "args": { + "External id": 989784,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864543.356, "dur": 0.415, + "args": { + "External id": 989785,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864546.531, "dur": 0.270, + "args": { + "External id": 989786,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864548.134, "dur": 0.357, + "args": { + "External id": 989787,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864549.425, "dur": 2.824, + "args": { + "External id": 989788,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864556.128, "dur": 0.545, + "args": { + "External id": 989789,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864557.974, "dur": 0.355, + "args": { + "External id": 989790,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940864571.963, "dur": 42.299, + "args": { + "External id": 989791,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345940864645.120, "dur": 110.750, + "args": { + "External id": 989792,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940864655.116, "dur": 3.451, + "args": { + "External id": 989793,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345940864664.103, "dur": 10.293, + "args": { + "External id": 989794,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345940864668.525, "dur": 5.456, + "args": { + "External id": 989795,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864671.977, "dur": 0.792, + "args": { + "External id": 989796,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345940864681.438, "dur": 24.654, + "args": { + "External id": 989797,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864683.533, "dur": 0.487, + "args": { + "External id": 989798,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864686.587, "dur": 0.436, + "args": { + "External id": 989799,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864688.163, "dur": 0.329, + "args": { + "External id": 989800,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864689.678, "dur": 3.714, + "args": { + "External id": 989801,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864694.720, "dur": 0.336, + "args": { + "External id": 989802,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864696.360, "dur": 0.345, + "args": { + "External id": 989803,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864699.563, "dur": 0.410, + "args": { + "External id": 989804,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864701.179, "dur": 0.307, + "args": { + "External id": 989805,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940864702.894, "dur": 0.209, + "args": { + "External id": 989806,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940864717.599, "dur": 30.904, + "args": { + "External id": 989807,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345940864799.876, "dur": 122.190, + "args": { + "External id": 989808,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345940864829.656, "dur": 89.060, + "args": { + "External id": 989809,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7792, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345940864841.192, "dur": 73.247, + "args": { + "External id": 989810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345940864940.509, "dur": 1.998, + "args": { + "External id": 989811,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7794, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940865046.815, "dur": 1778.920, + "args": { + "External id": 989812,"Sequence number": 10552519, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7795 + } + }, + { + "ph": "f", "id": 400, "pid": 2338706, "tid": 2379422, "ts": 6345940865046.815, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940865202.853, "dur": 114.111, + "args": { + "External id": 989813,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345940865365.819, "dur": 40.552, + "args": { + "External id": 989814,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345940865424.609, "dur": 49.434, + "args": { + "External id": 989815,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940865484.140, "dur": 31.592, + "args": { + "External id": 989816,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940865524.270, "dur": 32.246, + "args": { + "External id": 989817,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940865562.948, "dur": 27.373, + "args": { + "External id": 989818,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940865598.012, "dur": 28.191, + "args": { + "External id": 989819,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345940865650.310, "dur": 24.249, + "args": { + "External id": 989820,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345940865695.662, "dur": 28.974, + "args": { + "External id": 989821,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345940865747.443, "dur": 17.688, + "args": { + "External id": 989822,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345940865779.993, "dur": 15.899, + "args": { + "External id": 989823,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940865803.756, "dur": 34.709, + "args": { + "External id": 989824,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940865842.100, "dur": 33.283, + "args": { + "External id": 989825,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940865906.668, "dur": 315.433, + "args": { + "External id": 989826,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940865988.065, "dur": 6.254, + "args": { + "External id": 989827,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940865996.469, "dur": 3.170, + "args": { + "External id": 989828,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940866000.615, "dur": 2.071, + "args": { + "External id": 989829,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940866003.689, "dur": 2.678, + "args": { + "External id": 989830,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940866105.376, "dur": 7.776, + "args": { + "External id": 989831,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940866109.796, "dur": 2.987, + "args": { + "External id": 989832,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940866114.872, "dur": 35.471, + "args": { + "External id": 989833,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940866120.974, "dur": 2.913, + "args": { + "External id": 989834,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940866151.750, "dur": 1.550, + "args": { + "External id": 989835,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940866152.701, "dur": 0.499, + "args": { + "External id": 989836,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940866154.141, "dur": 18.875, + "args": { + "External id": 989837,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940866156.348, "dur": 2.934, + "args": { + "External id": 989838,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345940866266.657, "dur": 28.333, + "args": { + "External id": 989839,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345940866316.268, "dur": 18.494, + "args": { + "External id": 989840,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940866341.786, "dur": 64.391, + "args": { + "External id": 989841,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940866419.106, "dur": 43.493, + "args": { + "External id": 989842,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940866474.804, "dur": 24.412, + "args": { + "External id": 989843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940866504.938, "dur": 30.763, + "args": { + "External id": 989844,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940866546.231, "dur": 30.000, + "args": { + "External id": 989845,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940866583.615, "dur": 29.407, + "args": { + "External id": 989846,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345940866635.755, "dur": 24.819, + "args": { + "External id": 989847,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345940866678.532, "dur": 23.506, + "args": { + "External id": 989848,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345940866718.789, "dur": 17.819, + "args": { + "External id": 989849,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345940866752.242, "dur": 12.157, + "args": { + "External id": 989850,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345940866780.452, "dur": 14.681, + "args": { + "External id": 989851,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 7834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940866875.984, "dur": 16.495, + "args": { + "External id": 989852,"Record function id": 0, "Ev Idx": 7835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940866879.392, "dur": 12.102, + "args": { + "External id": 989853,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940866884.061, "dur": 6.263, + "args": { + "External id": 989854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940866885.559, "dur": 4.665, + "args": { + "External id": 989855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940866896.449, "dur": 5.079, + "args": { + "External id": 989856,"Record function id": 0, "Ev Idx": 7839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940866898.035, "dur": 2.970, + "args": { + "External id": 989857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940866898.825, "dur": 1.652, + "args": { + "External id": 989858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940866899.322, "dur": 1.030, + "args": { + "External id": 989859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940866904.750, "dur": 7.282, + "args": { + "External id": 989860,"Record function id": 0, "Ev Idx": 7843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940866906.153, "dur": 5.441, + "args": { + "External id": 989861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940866906.851, "dur": 4.191, + "args": { + "External id": 989862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940866907.666, "dur": 3.258, + "args": { + "External id": 989863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940866915.264, "dur": 4.437, + "args": { + "External id": 989864,"Record function id": 0, "Ev Idx": 7847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940866916.814, "dur": 2.423, + "args": { + "External id": 989865,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940866917.496, "dur": 1.342, + "args": { + "External id": 989866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940866917.950, "dur": 0.824, + "args": { + "External id": 989867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940866922.776, "dur": 3.901, + "args": { + "External id": 989868,"Record function id": 0, "Ev Idx": 7851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940866924.044, "dur": 2.194, + "args": { + "External id": 989869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940866924.648, "dur": 1.154, + "args": { + "External id": 989870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940866925.097, "dur": 0.628, + "args": { + "External id": 989871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940866929.802, "dur": 3.945, + "args": { + "External id": 989872,"Record function id": 0, "Ev Idx": 7855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940866931.045, "dur": 2.264, + "args": { + "External id": 989873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940866931.560, "dur": 1.106, + "args": { + "External id": 989874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940866931.985, "dur": 0.608, + "args": { + "External id": 989875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940866936.970, "dur": 6.455, + "args": { + "External id": 989876,"Record function id": 0, "Ev Idx": 7859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940866938.525, "dur": 4.474, + "args": { + "External id": 989877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940866938.993, "dur": 3.579, + "args": { + "External id": 989878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940866941.921, "dur": 0.543, + "args": { + "External id": 989879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940866946.570, "dur": 4.180, + "args": { + "External id": 989880,"Record function id": 0, "Ev Idx": 7863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940866947.825, "dur": 2.497, + "args": { + "External id": 989881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940866948.435, "dur": 1.264, + "args": { + "External id": 989882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940866948.723, "dur": 0.881, + "args": { + "External id": 989883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940866953.739, "dur": 3.613, + "args": { + "External id": 989884,"Record function id": 0, "Ev Idx": 7867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940866954.861, "dur": 2.053, + "args": { + "External id": 989885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940866955.439, "dur": 1.045, + "args": { + "External id": 989886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940866955.747, "dur": 0.665, + "args": { + "External id": 989887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940866962.081, "dur": 73003.727, + "args": { + "External id": 989888,"Record function id": 0, "Sequence number": 10552518, "Fwd thread id": 1, "Ev Idx": 7871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940866963.654, "dur": 72991.732, + "args": { + "External id": 989889,"Sequence number": 10552518, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7872 + } + }, + { + "ph": "f", "id": 401, "pid": 2338706, "tid": 2379422, "ts": 6345940866963.654, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.23)", "pid": 2338706, "tid": 2379422, + "ts": 6345940866994.177, "dur": 91.480, + "args": { + "External id": 989890,"Record function id": 0, "Ev Idx": 7873 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.23)", "pid": 2338706, "tid": 2379422, + "ts": 6345940867097.347, "dur": 71.263, + "args": { + "External id": 989891,"Record function id": 0, "Ev Idx": 7874 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.23)", "pid": 2338706, "tid": 2379422, + "ts": 6345940867174.381, "dur": 72773.049, + "args": { + "External id": 989892,"Record function id": 0, "Ev Idx": 7875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940867271.275, "dur": 7.982, + "args": { + "External id": 989893,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940867290.552, "dur": 6.918, + "args": { + "External id": 989894,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345940867311.806, "dur": 71636.371, + "args": { + "External id": 989895,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345940867328.285, "dur": 71601.750, + "args": { + "External id": 989896,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940867423.213, "dur": 19.035, + "args": { + "External id": 989897,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345940867461.891, "dur": 71416.730, + "args": { + "External id": 989898,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 7881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940867464.646, "dur": 71412.605, + "args": { + "External id": 989899,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 7882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940867469.975, "dur": 10.179, + "args": { + "External id": 989900,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940867482.187, "dur": 71389.903, + "args": { + "External id": 989901,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 7884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940939111.858, "dur": 14.162, + "args": { + "External id": 989902,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 7885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940939115.660, "dur": 9.588, + "args": { + "External id": 989903,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345940939156.949, "dur": 378.750, + "args": { + "External id": 989904,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 7887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345940939191.997, "dur": 338.799, + "args": { + "External id": 989905,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7888, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345940939207.007, "dur": 318.071, + "args": { + "External id": 989906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 7889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345940939558.348, "dur": 2.148, + "args": { + "External id": 989907,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7890, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940939623.081, "dur": 7.009, + "args": { + "External id": 989908,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940939642.911, "dur": 36.108, + "args": { + "External id": 989909,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940939689.026, "dur": 1.651, + "args": { + "External id": 989910,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940939696.754, "dur": 14.140, + "args": { + "External id": 989911,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940939717.201, "dur": 1.207, + "args": { + "External id": 989912,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940939738.956, "dur": 13.696, + "args": { + "External id": 989913,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940939759.778, "dur": 0.973, + "args": { + "External id": 989914,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940939765.326, "dur": 11.671, + "args": { + "External id": 989915,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940939781.478, "dur": 3.106, + "args": { + "External id": 989916,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940939788.659, "dur": 10.740, + "args": { + "External id": 989917,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940939803.586, "dur": 2.048, + "args": { + "External id": 989918,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940939809.869, "dur": 17.123, + "args": { + "External id": 989919,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940939833.760, "dur": 0.969, + "args": { + "External id": 989920,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940939838.721, "dur": 10.293, + "args": { + "External id": 989921,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940939852.893, "dur": 0.808, + "args": { + "External id": 989922,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940939857.715, "dur": 10.695, + "args": { + "External id": 989923,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940939872.528, "dur": 0.853, + "args": { + "External id": 989924,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345940939877.088, "dur": 8.849, + "args": { + "External id": 989925,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 7908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940939980.360, "dur": 3023.557, + "args": { + "External id": 989926,"Record function id": 0, "Ev Idx": 7909 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.22)", "pid": 2338706, "tid": 2379422, + "ts": 6345940940004.262, "dur": 1155.110, + "args": { + "External id": 989927,"Record function id": 0, "Ev Idx": 7910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338706, "tid": 2379422, + "ts": 6345940940042.624, "dur": 374.523, + "args": { + "External id": 989928,"Record function id": 0, "Ev Idx": 7911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940940171.250, "dur": 5.532, + "args": { + "External id": 989929,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940940180.573, "dur": 1.084, + "args": { + "External id": 989930,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940940183.787, "dur": 0.995, + "args": { + "External id": 989931,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940940186.552, "dur": 3.158, + "args": { + "External id": 989932,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940940193.670, "dur": 1.258, + "args": { + "External id": 989933,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940940196.575, "dur": 0.713, + "args": { + "External id": 989934,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940940198.979, "dur": 0.854, + "args": { + "External id": 989935,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940940201.910, "dur": 1.049, + "args": { + "External id": 989936,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940940206.207, "dur": 0.933, + "args": { + "External id": 989937,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940940209.033, "dur": 0.653, + "args": { + "External id": 989938,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940940228.632, "dur": 155.510, + "args": { + "External id": 989939,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940940246.396, "dur": 133.165, + "args": { + "External id": 989940,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940940262.497, "dur": 15.978, + "args": { + "External id": 989941,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345940940282.634, "dur": 70.985, + "args": { + "External id": 989942,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 7925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940940287.909, "dur": 65.381, + "args": { + "External id": 989943,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 7926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940292.235, "dur": 7.811, + "args": { + "External id": 989944,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940940301.716, "dur": 51.041, + "args": { + "External id": 989945,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 7928 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.21", "pid": 2338706, "tid": 2379422, + "ts": 6345940940506.759, "dur": 644.986, + "args": { + "External id": 989946,"Record function id": 0, "Ev Idx": 7929 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338706, "tid": 2379422, + "ts": 6345940940525.041, "dur": 611.632, + "args": { + "External id": 989947,"Record function id": 0, "Ev Idx": 7930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940940585.837, "dur": 5.018, + "args": { + "External id": 989948,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345940940608.055, "dur": 31.912, + "args": { + "External id": 989949,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940613.424, "dur": 2.440, + "args": { + "External id": 989950,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940617.753, "dur": 0.413, + "args": { + "External id": 989951,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940619.700, "dur": 0.341, + "args": { + "External id": 989952,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940622.776, "dur": 0.359, + "args": { + "External id": 989953,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940624.566, "dur": 0.606, + "args": { + "External id": 989954,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940626.521, "dur": 0.346, + "args": { + "External id": 989955,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940629.890, "dur": 2.471, + "args": { + "External id": 989956,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940633.574, "dur": 0.505, + "args": { + "External id": 989957,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940635.431, "dur": 1.501, + "args": { + "External id": 989958,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940940655.139, "dur": 41.671, + "args": { + "External id": 989959,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345940940726.989, "dur": 107.734, + "args": { + "External id": 989960,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940940737.434, "dur": 4.337, + "args": { + "External id": 989961,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345940940747.283, "dur": 10.581, + "args": { + "External id": 989962,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345940940751.835, "dur": 5.594, + "args": { + "External id": 989963,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940755.404, "dur": 0.624, + "args": { + "External id": 989964,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345940940764.623, "dur": 23.975, + "args": { + "External id": 989965,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940766.428, "dur": 0.368, + "args": { + "External id": 989966,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940768.328, "dur": 1.930, + "args": { + "External id": 989967,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940771.654, "dur": 0.628, + "args": { + "External id": 989968,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940773.318, "dur": 2.746, + "args": { + "External id": 989969,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940778.843, "dur": 0.301, + "args": { + "External id": 989970,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940780.364, "dur": 0.292, + "args": { + "External id": 989971,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940781.762, "dur": 0.402, + "args": { + "External id": 989972,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940784.119, "dur": 0.341, + "args": { + "External id": 989973,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940940785.470, "dur": 0.314, + "args": { + "External id": 989974,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940940798.454, "dur": 28.628, + "args": { + "External id": 989975,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345940940877.993, "dur": 121.485, + "args": { + "External id": 989976,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345940940910.173, "dur": 85.911, + "args": { + "External id": 989977,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7960, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345940940919.458, "dur": 72.354, + "args": { + "External id": 989978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345940941037.131, "dur": 2.899, + "args": { + "External id": 989979,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7962, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940941211.380, "dur": 1766.257, + "args": { + "External id": 989980,"Sequence number": 10552517, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7963 + } + }, + { + "ph": "f", "id": 402, "pid": 2338706, "tid": 2379422, "ts": 6345940941211.380, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940941331.116, "dur": 114.096, + "args": { + "External id": 989981,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345940941495.940, "dur": 43.327, + "args": { + "External id": 989982,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345940941557.494, "dur": 50.617, + "args": { + "External id": 989983,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940941619.237, "dur": 32.217, + "args": { + "External id": 989984,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940941660.450, "dur": 32.884, + "args": { + "External id": 989985,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940941699.897, "dur": 26.344, + "args": { + "External id": 989986,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940941732.611, "dur": 29.294, + "args": { + "External id": 989987,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345940941790.196, "dur": 24.575, + "args": { + "External id": 989988,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345940941839.207, "dur": 30.975, + "args": { + "External id": 989989,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345940941892.062, "dur": 19.497, + "args": { + "External id": 989990,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345940941928.947, "dur": 13.984, + "args": { + "External id": 989991,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940941950.441, "dur": 34.825, + "args": { + "External id": 989992,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940941988.981, "dur": 50.554, + "args": { + "External id": 989993,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345940942110.701, "dur": 256.418, + "args": { + "External id": 989994,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940942196.981, "dur": 6.689, + "args": { + "External id": 989995,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940942205.749, "dur": 2.822, + "args": { + "External id": 989996,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940942210.002, "dur": 2.612, + "args": { + "External id": 989997,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940942213.516, "dur": 3.177, + "args": { + "External id": 989998,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940942263.349, "dur": 5.280, + "args": { + "External id": 989999,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940942265.765, "dur": 2.654, + "args": { + "External id": 990000,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940942270.775, "dur": 30.836, + "args": { + "External id": 990001,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940942276.501, "dur": 1.556, + "args": { + "External id": 990002,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345940942303.409, "dur": 1.427, + "args": { + "External id": 990003,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940942304.240, "dur": 0.521, + "args": { + "External id": 990004,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345940942308.143, "dur": 16.651, + "args": { + "External id": 990005,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940942310.062, "dur": 3.000, + "args": { + "External id": 990006,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345940942409.186, "dur": 28.471, + "args": { + "External id": 990007,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345940942456.964, "dur": 17.709, + "args": { + "External id": 990008,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940942482.854, "dur": 61.762, + "args": { + "External id": 990009,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940942557.057, "dur": 45.359, + "args": { + "External id": 990010,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940942614.683, "dur": 23.420, + "args": { + "External id": 990011,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940942644.074, "dur": 30.434, + "args": { + "External id": 990012,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940942681.604, "dur": 31.793, + "args": { + "External id": 990013,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345940942719.894, "dur": 30.196, + "args": { + "External id": 990014,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345940942774.815, "dur": 25.120, + "args": { + "External id": 990015,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345940942819.176, "dur": 26.605, + "args": { + "External id": 990016,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345940942864.208, "dur": 19.223, + "args": { + "External id": 990017,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345940942901.415, "dur": 14.531, + "args": { + "External id": 990018,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345940942929.378, "dur": 17.685, + "args": { + "External id": 990019,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940943046.962, "dur": 50.215, + "args": { + "External id": 990020,"Record function id": 0, "Ev Idx": 8003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940943051.093, "dur": 44.330, + "args": { + "External id": 990021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940943087.907, "dur": 6.089, + "args": { + "External id": 990022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940943089.358, "dur": 4.350, + "args": { + "External id": 990023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940943103.455, "dur": 5.733, + "args": { + "External id": 990024,"Record function id": 0, "Ev Idx": 8007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940943105.487, "dur": 3.222, + "args": { + "External id": 990025,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940943106.478, "dur": 1.607, + "args": { + "External id": 990026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940943107.059, "dur": 0.917, + "args": { + "External id": 990027,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940943112.524, "dur": 6.687, + "args": { + "External id": 990028,"Record function id": 0, "Ev Idx": 8011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940943114.018, "dur": 4.714, + "args": { + "External id": 990029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940943114.613, "dur": 3.555, + "args": { + "External id": 990030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940943115.261, "dur": 2.842, + "args": { + "External id": 990031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940943122.459, "dur": 4.732, + "args": { + "External id": 990032,"Record function id": 0, "Ev Idx": 8015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940943124.350, "dur": 2.324, + "args": { + "External id": 990033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940943125.025, "dur": 1.213, + "args": { + "External id": 990034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940943125.349, "dur": 0.814, + "args": { + "External id": 990035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940943130.310, "dur": 4.219, + "args": { + "External id": 990036,"Record function id": 0, "Ev Idx": 8019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940943131.465, "dur": 2.624, + "args": { + "External id": 990037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940943132.140, "dur": 1.343, + "args": { + "External id": 990038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940943132.471, "dur": 0.945, + "args": { + "External id": 990039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940943137.599, "dur": 6.489, + "args": { + "External id": 990040,"Record function id": 0, "Ev Idx": 8023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940943138.743, "dur": 4.859, + "args": { + "External id": 990041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940943139.425, "dur": 3.293, + "args": { + "External id": 990042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940943141.959, "dur": 0.646, + "args": { + "External id": 990043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940943147.368, "dur": 4.057, + "args": { + "External id": 990044,"Record function id": 0, "Ev Idx": 8027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940943148.666, "dur": 2.349, + "args": { + "External id": 990045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940943149.245, "dur": 1.179, + "args": { + "External id": 990046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940943149.705, "dur": 0.652, + "args": { + "External id": 990047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940943154.508, "dur": 4.136, + "args": { + "External id": 990048,"Record function id": 0, "Ev Idx": 8031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940943156.038, "dur": 2.201, + "args": { + "External id": 990049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940943156.505, "dur": 1.029, + "args": { + "External id": 990050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940943156.838, "dur": 0.629, + "args": { + "External id": 990051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940943161.688, "dur": 4.170, + "args": { + "External id": 990052,"Record function id": 0, "Ev Idx": 8035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345940943162.827, "dur": 2.626, + "args": { + "External id": 990053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940943163.535, "dur": 1.206, + "args": { + "External id": 990054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345940943164.072, "dur": 0.593, + "args": { + "External id": 990055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940943170.464, "dur": 72963.913, + "args": { + "External id": 990056,"Record function id": 0, "Sequence number": 10552516, "Fwd thread id": 1, "Ev Idx": 8039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345940943171.896, "dur": 72951.273, + "args": { + "External id": 990057,"Sequence number": 10552516, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8040 + } + }, + { + "ph": "f", "id": 403, "pid": 2338706, "tid": 2379422, "ts": 6345940943171.896, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.22)", "pid": 2338706, "tid": 2379422, + "ts": 6345940943207.490, "dur": 42.050, + "args": { + "External id": 990058,"Record function id": 0, "Ev Idx": 8041 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.22)", "pid": 2338706, "tid": 2379422, + "ts": 6345940943258.032, "dur": 67.291, + "args": { + "External id": 990059,"Record function id": 0, "Ev Idx": 8042 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.22)", "pid": 2338706, "tid": 2379422, + "ts": 6345940943331.744, "dur": 72781.013, + "args": { + "External id": 990060,"Record function id": 0, "Ev Idx": 8043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940943426.232, "dur": 7.792, + "args": { + "External id": 990061,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345940943443.693, "dur": 7.114, + "args": { + "External id": 990062,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345940943468.216, "dur": 71611.618, + "args": { + "External id": 990063,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345940943482.779, "dur": 71560.564, + "args": { + "External id": 990064,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345940943574.375, "dur": 18.992, + "args": { + "External id": 990065,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345940943612.476, "dur": 71370.717, + "args": { + "External id": 990066,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345940943617.254, "dur": 71364.781, + "args": { + "External id": 990067,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345940943621.346, "dur": 8.647, + "args": { + "External id": 990068,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345940943649.547, "dur": 71327.366, + "args": { + "External id": 990069,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941015198.301, "dur": 13.504, + "args": { + "External id": 990070,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941015201.924, "dur": 9.510, + "args": { + "External id": 990071,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941015246.262, "dur": 419.569, + "args": { + "External id": 990072,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941015279.607, "dur": 381.025, + "args": { + "External id": 990073,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8056, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941015292.360, "dur": 362.659, + "args": { + "External id": 990074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941015691.049, "dur": 2.378, + "args": { + "External id": 990075,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8058, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941015756.267, "dur": 7.140, + "args": { + "External id": 990076,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941015776.229, "dur": 36.754, + "args": { + "External id": 990077,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941015823.792, "dur": 1.585, + "args": { + "External id": 990078,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941015831.208, "dur": 12.185, + "args": { + "External id": 990079,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941015849.269, "dur": 1.177, + "args": { + "External id": 990080,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941015854.919, "dur": 11.128, + "args": { + "External id": 990081,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941015871.513, "dur": 1.131, + "args": { + "External id": 990082,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941015876.920, "dur": 11.096, + "args": { + "External id": 990083,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941015892.625, "dur": 3.415, + "args": { + "External id": 990084,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941015900.247, "dur": 10.067, + "args": { + "External id": 990085,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941015914.480, "dur": 1.361, + "args": { + "External id": 990086,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941015922.267, "dur": 11.600, + "args": { + "External id": 990087,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941015938.356, "dur": 0.773, + "args": { + "External id": 990088,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941015943.004, "dur": 10.638, + "args": { + "External id": 990089,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941015958.282, "dur": 0.943, + "args": { + "External id": 990090,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941015963.785, "dur": 11.238, + "args": { + "External id": 990091,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941015981.658, "dur": 0.749, + "args": { + "External id": 990092,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941015986.428, "dur": 10.406, + "args": { + "External id": 990093,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941016153.226, "dur": 2975.590, + "args": { + "External id": 990094,"Record function id": 0, "Ev Idx": 8077 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.21)", "pid": 2338706, "tid": 2379422, + "ts": 6345941016175.714, "dur": 1108.951, + "args": { + "External id": 990095,"Record function id": 0, "Ev Idx": 8078 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338706, "tid": 2379422, + "ts": 6345941016193.481, "dur": 339.889, + "args": { + "External id": 990096,"Record function id": 0, "Ev Idx": 8079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941016286.909, "dur": 5.356, + "args": { + "External id": 990097,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941016295.814, "dur": 0.970, + "args": { + "External id": 990098,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941016298.917, "dur": 0.873, + "args": { + "External id": 990099,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941016301.393, "dur": 2.647, + "args": { + "External id": 990100,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941016305.626, "dur": 1.649, + "args": { + "External id": 990101,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941016308.765, "dur": 0.666, + "args": { + "External id": 990102,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941016311.556, "dur": 0.851, + "args": { + "External id": 990103,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941016316.608, "dur": 1.091, + "args": { + "External id": 990104,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941016319.310, "dur": 0.970, + "args": { + "External id": 990105,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941016321.618, "dur": 1.145, + "args": { + "External id": 990106,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941016340.826, "dur": 162.236, + "args": { + "External id": 990107,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941016359.680, "dur": 138.623, + "args": { + "External id": 990108,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941016380.690, "dur": 16.035, + "args": { + "External id": 990109,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941016400.549, "dur": 70.323, + "args": { + "External id": 990110,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941016403.441, "dur": 67.028, + "args": { + "External id": 990111,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016407.002, "dur": 8.686, + "args": { + "External id": 990112,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941016417.285, "dur": 52.176, + "args": { + "External id": 990113,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.20", "pid": 2338706, "tid": 2379422, + "ts": 6345941016621.789, "dur": 655.246, + "args": { + "External id": 990114,"Record function id": 0, "Ev Idx": 8097 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338706, "tid": 2379422, + "ts": 6345941016637.891, "dur": 624.843, + "args": { + "External id": 990115,"Record function id": 0, "Ev Idx": 8098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941016697.766, "dur": 4.650, + "args": { + "External id": 990116,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941016719.891, "dur": 30.272, + "args": { + "External id": 990117,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016724.703, "dur": 1.665, + "args": { + "External id": 990118,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016728.852, "dur": 1.784, + "args": { + "External id": 990119,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016732.239, "dur": 0.316, + "args": { + "External id": 990120,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016733.890, "dur": 0.429, + "args": { + "External id": 990121,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016737.035, "dur": 0.323, + "args": { + "External id": 990122,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016738.565, "dur": 0.311, + "args": { + "External id": 990123,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016739.865, "dur": 2.554, + "args": { + "External id": 990124,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016744.909, "dur": 0.304, + "args": { + "External id": 990125,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016746.525, "dur": 0.259, + "args": { + "External id": 990126,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941016760.703, "dur": 39.153, + "args": { + "External id": 990127,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345941016831.030, "dur": 109.287, + "args": { + "External id": 990128,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941016841.049, "dur": 3.441, + "args": { + "External id": 990129,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345941016849.815, "dur": 9.869, + "args": { + "External id": 990130,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345941016854.003, "dur": 5.289, + "args": { + "External id": 990131,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016857.323, "dur": 0.667, + "args": { + "External id": 990132,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941016866.455, "dur": 25.604, + "args": { + "External id": 990133,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016868.561, "dur": 0.718, + "args": { + "External id": 990134,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016871.298, "dur": 0.432, + "args": { + "External id": 990135,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016873.347, "dur": 0.338, + "args": { + "External id": 990136,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016875.005, "dur": 4.025, + "args": { + "External id": 990137,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016880.490, "dur": 0.428, + "args": { + "External id": 990138,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016882.150, "dur": 0.399, + "args": { + "External id": 990139,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016885.403, "dur": 0.340, + "args": { + "External id": 990140,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016887.053, "dur": 0.352, + "args": { + "External id": 990141,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941016888.750, "dur": 0.323, + "args": { + "External id": 990142,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941016903.903, "dur": 29.074, + "args": { + "External id": 990143,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941016984.595, "dur": 187.236, + "args": { + "External id": 990144,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941017035.639, "dur": 131.625, + "args": { + "External id": 990145,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8128, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941017048.479, "dur": 114.214, + "args": { + "External id": 990146,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941017197.035, "dur": 2.042, + "args": { + "External id": 990147,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8130, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941017292.990, "dur": 1809.607, + "args": { + "External id": 990148,"Sequence number": 10552515, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8131 + } + }, + { + "ph": "f", "id": 404, "pid": 2338706, "tid": 2379422, "ts": 6345941017292.990, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941017411.150, "dur": 108.093, + "args": { + "External id": 990149,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941017572.681, "dur": 42.585, + "args": { + "External id": 990150,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345941017633.093, "dur": 48.848, + "args": { + "External id": 990151,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941017692.543, "dur": 32.096, + "args": { + "External id": 990152,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941017733.233, "dur": 31.291, + "args": { + "External id": 990153,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941017770.497, "dur": 26.799, + "args": { + "External id": 990154,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941017803.596, "dur": 27.999, + "args": { + "External id": 990155,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941017858.987, "dur": 22.757, + "args": { + "External id": 990156,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941017904.311, "dur": 28.804, + "args": { + "External id": 990157,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941017957.319, "dur": 19.385, + "args": { + "External id": 990158,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941017991.508, "dur": 14.575, + "args": { + "External id": 990159,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941018034.973, "dur": 76.437, + "args": { + "External id": 990160,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941018118.473, "dur": 35.820, + "args": { + "External id": 990161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345941018188.884, "dur": 254.458, + "args": { + "External id": 990162,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941018272.156, "dur": 6.285, + "args": { + "External id": 990163,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941018280.469, "dur": 3.070, + "args": { + "External id": 990164,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941018285.219, "dur": 2.721, + "args": { + "External id": 990165,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941018289.040, "dur": 3.668, + "args": { + "External id": 990166,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941018336.882, "dur": 7.587, + "args": { + "External id": 990167,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941018341.563, "dur": 2.685, + "args": { + "External id": 990168,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941018346.498, "dur": 30.266, + "args": { + "External id": 990169,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941018352.128, "dur": 1.693, + "args": { + "External id": 990170,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941018378.352, "dur": 1.769, + "args": { + "External id": 990171,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941018379.480, "dur": 0.540, + "args": { + "External id": 990172,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941018381.344, "dur": 16.604, + "args": { + "External id": 990173,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941018383.815, "dur": 2.741, + "args": { + "External id": 990174,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941018485.363, "dur": 28.452, + "args": { + "External id": 990175,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941018534.887, "dur": 16.322, + "args": { + "External id": 990176,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941018560.434, "dur": 54.842, + "args": { + "External id": 990177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941018627.959, "dur": 46.061, + "args": { + "External id": 990178,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941018685.924, "dur": 23.183, + "args": { + "External id": 990179,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941018714.987, "dur": 30.093, + "args": { + "External id": 990180,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941018752.636, "dur": 32.830, + "args": { + "External id": 990181,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941018791.486, "dur": 30.804, + "args": { + "External id": 990182,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345941018846.246, "dur": 29.274, + "args": { + "External id": 990183,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941018893.877, "dur": 26.312, + "args": { + "External id": 990184,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941018938.807, "dur": 16.686, + "args": { + "External id": 990185,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941018973.461, "dur": 12.773, + "args": { + "External id": 990186,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345941019001.089, "dur": 34.740, + "args": { + "External id": 990187,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941019153.260, "dur": 18.182, + "args": { + "External id": 990188,"Record function id": 0, "Ev Idx": 8171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941019158.452, "dur": 11.951, + "args": { + "External id": 990189,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941019163.408, "dur": 6.063, + "args": { + "External id": 990190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941019165.140, "dur": 4.219, + "args": { + "External id": 990191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941019175.491, "dur": 5.139, + "args": { + "External id": 990192,"Record function id": 0, "Ev Idx": 8175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941019177.185, "dur": 2.983, + "args": { + "External id": 990193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941019178.012, "dur": 1.644, + "args": { + "External id": 990194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941019178.517, "dur": 1.053, + "args": { + "External id": 990195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941019183.833, "dur": 6.267, + "args": { + "External id": 990196,"Record function id": 0, "Ev Idx": 8179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941019185.289, "dur": 4.347, + "args": { + "External id": 990197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941019185.980, "dur": 3.089, + "args": { + "External id": 990198,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941019186.686, "dur": 2.267, + "args": { + "External id": 990199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941019193.310, "dur": 4.800, + "args": { + "External id": 990200,"Record function id": 0, "Ev Idx": 8183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941019194.520, "dur": 3.159, + "args": { + "External id": 990201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941019195.226, "dur": 1.834, + "args": { + "External id": 990202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941019195.880, "dur": 1.119, + "args": { + "External id": 990203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941019201.337, "dur": 3.957, + "args": { + "External id": 990204,"Record function id": 0, "Ev Idx": 8187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941019202.436, "dur": 2.455, + "args": { + "External id": 990205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941019203.010, "dur": 1.187, + "args": { + "External id": 990206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941019203.437, "dur": 0.686, + "args": { + "External id": 990207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941019208.468, "dur": 75.839, + "args": { + "External id": 990208,"Record function id": 0, "Ev Idx": 8191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941019278.498, "dur": 5.226, + "args": { + "External id": 990209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941019279.714, "dur": 3.450, + "args": { + "External id": 990210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941019282.171, "dur": 0.763, + "args": { + "External id": 990211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941019293.807, "dur": 3.839, + "args": { + "External id": 990212,"Record function id": 0, "Ev Idx": 8195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941019295.117, "dur": 1.979, + "args": { + "External id": 990213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941019295.689, "dur": 0.989, + "args": { + "External id": 990214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941019296.042, "dur": 0.550, + "args": { + "External id": 990215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941019301.149, "dur": 4.608, + "args": { + "External id": 990216,"Record function id": 0, "Ev Idx": 8199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941019302.660, "dur": 2.611, + "args": { + "External id": 990217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941019303.164, "dur": 1.398, + "args": { + "External id": 990218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941019303.797, "dur": 0.693, + "args": { + "External id": 990219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941019309.333, "dur": 3.911, + "args": { + "External id": 990220,"Record function id": 0, "Ev Idx": 8203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941019310.518, "dur": 2.314, + "args": { + "External id": 990221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941019310.986, "dur": 1.263, + "args": { + "External id": 990222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941019311.516, "dur": 0.668, + "args": { + "External id": 990223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941019317.816, "dur": 66428.313, + "args": { + "External id": 990224,"Record function id": 0, "Sequence number": 10552514, "Fwd thread id": 1, "Ev Idx": 8207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941019319.442, "dur": 66416.966, + "args": { + "External id": 990225,"Sequence number": 10552514, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8208 + } + }, + { + "ph": "f", "id": 405, "pid": 2338706, "tid": 2379422, "ts": 6345941019319.442, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.21)", "pid": 2338706, "tid": 2379422, + "ts": 6345941019353.168, "dur": 42.312, + "args": { + "External id": 990226,"Record function id": 0, "Ev Idx": 8209 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.21)", "pid": 2338706, "tid": 2379422, + "ts": 6345941019403.486, "dur": 69.450, + "args": { + "External id": 990227,"Record function id": 0, "Ev Idx": 8210 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.21)", "pid": 2338706, "tid": 2379422, + "ts": 6345941019478.891, "dur": 66249.307, + "args": { + "External id": 990228,"Record function id": 0, "Ev Idx": 8211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941019576.526, "dur": 7.372, + "args": { + "External id": 990229,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941019594.098, "dur": 6.794, + "args": { + "External id": 990230,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941019619.413, "dur": 65134.099, + "args": { + "External id": 990231,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941019643.933, "dur": 65095.877, + "args": { + "External id": 990232,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941019760.792, "dur": 19.049, + "args": { + "External id": 990233,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941019799.602, "dur": 64891.722, + "args": { + "External id": 990234,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941019802.906, "dur": 64887.342, + "args": { + "External id": 990235,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941019810.061, "dur": 8.825, + "args": { + "External id": 990236,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941019821.088, "dur": 64863.802, + "args": { + "External id": 990237,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941084869.078, "dur": 12.746, + "args": { + "External id": 990238,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941084872.881, "dur": 8.555, + "args": { + "External id": 990239,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941084915.429, "dur": 414.823, + "args": { + "External id": 990240,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941084950.879, "dur": 373.621, + "args": { + "External id": 990241,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8224, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941084966.471, "dur": 351.665, + "args": { + "External id": 990242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941085355.957, "dur": 2.801, + "args": { + "External id": 990243,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8226, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941085424.675, "dur": 7.280, + "args": { + "External id": 990244,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941085445.366, "dur": 35.622, + "args": { + "External id": 990245,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941085491.342, "dur": 1.542, + "args": { + "External id": 990246,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941085499.466, "dur": 12.829, + "args": { + "External id": 990247,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941085518.998, "dur": 1.033, + "args": { + "External id": 990248,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941085526.970, "dur": 10.877, + "args": { + "External id": 990249,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941085542.714, "dur": 1.009, + "args": { + "External id": 990250,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941085547.866, "dur": 11.499, + "args": { + "External id": 990251,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941085564.583, "dur": 2.854, + "args": { + "External id": 990252,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941085571.804, "dur": 9.731, + "args": { + "External id": 990253,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941085585.697, "dur": 1.267, + "args": { + "External id": 990254,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941085591.171, "dur": 10.256, + "args": { + "External id": 990255,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941085608.573, "dur": 0.797, + "args": { + "External id": 990256,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941085613.851, "dur": 10.209, + "args": { + "External id": 990257,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941085628.993, "dur": 0.816, + "args": { + "External id": 990258,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941085633.571, "dur": 10.344, + "args": { + "External id": 990259,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941085647.891, "dur": 1.009, + "args": { + "External id": 990260,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941085655.460, "dur": 10.204, + "args": { + "External id": 990261,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941085761.000, "dur": 3014.801, + "args": { + "External id": 990262,"Record function id": 0, "Ev Idx": 8245 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.20)", "pid": 2338706, "tid": 2379422, + "ts": 6345941085782.452, "dur": 1124.475, + "args": { + "External id": 990263,"Record function id": 0, "Ev Idx": 8246 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338706, "tid": 2379422, + "ts": 6345941085798.189, "dur": 380.831, + "args": { + "External id": 990264,"Record function id": 0, "Ev Idx": 8247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941085881.979, "dur": 3.808, + "args": { + "External id": 990265,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941085888.451, "dur": 0.896, + "args": { + "External id": 990266,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941085891.472, "dur": 1.031, + "args": { + "External id": 990267,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941085894.302, "dur": 3.017, + "args": { + "External id": 990268,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941085898.856, "dur": 1.178, + "args": { + "External id": 990269,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941085901.803, "dur": 1.034, + "args": { + "External id": 990270,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941085904.282, "dur": 0.753, + "args": { + "External id": 990271,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941085908.371, "dur": 1.073, + "args": { + "External id": 990272,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941085911.002, "dur": 0.897, + "args": { + "External id": 990273,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941085913.618, "dur": 0.854, + "args": { + "External id": 990274,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941085932.411, "dur": 210.669, + "args": { + "External id": 990275,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941085951.044, "dur": 185.864, + "args": { + "External id": 990276,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941085968.924, "dur": 14.288, + "args": { + "External id": 990277,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941085989.227, "dur": 121.048, + "args": { + "External id": 990278,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941085992.070, "dur": 117.804, + "args": { + "External id": 990279,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941085995.547, "dur": 7.688, + "args": { + "External id": 990280,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941086004.967, "dur": 103.551, + "args": { + "External id": 990281,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8264 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.19", "pid": 2338706, "tid": 2379422, + "ts": 6345941086275.331, "dur": 623.496, + "args": { + "External id": 990282,"Record function id": 0, "Ev Idx": 8265 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338706, "tid": 2379422, + "ts": 6345941086295.231, "dur": 590.526, + "args": { + "External id": 990283,"Record function id": 0, "Ev Idx": 8266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941086360.495, "dur": 6.597, + "args": { + "External id": 990284,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941086385.009, "dur": 28.992, + "args": { + "External id": 990285,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941086390.211, "dur": 2.095, + "args": { + "External id": 990286,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941086394.054, "dur": 1.654, + "args": { + "External id": 990287,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941086397.040, "dur": 0.496, + "args": { + "External id": 990288,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941086398.368, "dur": 0.544, + "args": { + "External id": 990289,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941086401.111, "dur": 0.420, + "args": { + "External id": 990290,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941086402.309, "dur": 0.784, + "args": { + "External id": 990291,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941086404.187, "dur": 2.848, + "args": { + "External id": 990292,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941086408.025, "dur": 0.636, + "args": { + "External id": 990293,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941086409.674, "dur": 0.562, + "args": { + "External id": 990294,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941086426.222, "dur": 44.493, + "args": { + "External id": 990295,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345941086503.016, "dur": 135.154, + "args": { + "External id": 990296,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941086513.301, "dur": 3.378, + "args": { + "External id": 990297,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345941086521.887, "dur": 10.337, + "args": { + "External id": 990298,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345941086526.467, "dur": 5.332, + "args": { + "External id": 990299,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941086529.665, "dur": 0.707, + "args": { + "External id": 990300,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941086560.974, "dur": 23.086, + "args": { + "External id": 990301,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941086563.096, "dur": 0.520, + "args": { + "External id": 990302,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941086566.111, "dur": 0.353, + "args": { + "External id": 990303,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941086567.756, "dur": 0.531, + "args": { + "External id": 990304,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941086569.442, "dur": 3.321, + "args": { + "External id": 990305,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941086573.549, "dur": 0.371, + "args": { + "External id": 990306,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941086574.814, "dur": 0.618, + "args": { + "External id": 990307,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941086577.963, "dur": 0.454, + "args": { + "External id": 990308,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941086579.347, "dur": 0.440, + "args": { + "External id": 990309,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941086580.425, "dur": 0.701, + "args": { + "External id": 990310,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941086595.343, "dur": 31.313, + "args": { + "External id": 990311,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941086688.947, "dur": 123.764, + "args": { + "External id": 990312,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941086721.715, "dur": 87.383, + "args": { + "External id": 990313,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8296, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941086730.957, "dur": 73.516, + "args": { + "External id": 990314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941086830.505, "dur": 1.938, + "args": { + "External id": 990315,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8298, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941086915.187, "dur": 1834.081, + "args": { + "External id": 990316,"Sequence number": 10552513, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8299 + } + }, + { + "ph": "f", "id": 406, "pid": 2338706, "tid": 2379422, "ts": 6345941086915.187, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941087051.158, "dur": 146.700, + "args": { + "External id": 990317,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941087250.755, "dur": 44.537, + "args": { + "External id": 990318,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345941087314.049, "dur": 48.290, + "args": { + "External id": 990319,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941087373.946, "dur": 31.316, + "args": { + "External id": 990320,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941087413.592, "dur": 32.281, + "args": { + "External id": 990321,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941087452.620, "dur": 27.076, + "args": { + "External id": 990322,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941087488.358, "dur": 28.771, + "args": { + "External id": 990323,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941087543.023, "dur": 23.140, + "args": { + "External id": 990324,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941087586.568, "dur": 27.815, + "args": { + "External id": 990325,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941087637.999, "dur": 17.976, + "args": { + "External id": 990326,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941087671.235, "dur": 14.278, + "args": { + "External id": 990327,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941087693.158, "dur": 36.022, + "args": { + "External id": 990328,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941087732.583, "dur": 32.161, + "args": { + "External id": 990329,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345941087796.853, "dur": 316.741, + "args": { + "External id": 990330,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941087876.887, "dur": 6.260, + "args": { + "External id": 990331,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941087885.043, "dur": 2.355, + "args": { + "External id": 990332,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941087888.480, "dur": 1.881, + "args": { + "External id": 990333,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941087891.504, "dur": 2.016, + "args": { + "External id": 990334,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941087940.521, "dur": 7.775, + "args": { + "External id": 990335,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941087945.283, "dur": 2.811, + "args": { + "External id": 990336,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941087950.250, "dur": 33.630, + "args": { + "External id": 990337,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941087957.074, "dur": 1.729, + "args": { + "External id": 990338,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941087985.323, "dur": 1.851, + "args": { + "External id": 990339,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941087986.476, "dur": 0.603, + "args": { + "External id": 990340,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941087988.507, "dur": 38.098, + "args": { + "External id": 990341,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941087990.253, "dur": 3.005, + "args": { + "External id": 990342,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941088159.227, "dur": 32.549, + "args": { + "External id": 990343,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941088215.783, "dur": 18.376, + "args": { + "External id": 990344,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941088243.125, "dur": 54.947, + "args": { + "External id": 990345,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941088305.263, "dur": 38.444, + "args": { + "External id": 990346,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941088357.973, "dur": 33.950, + "args": { + "External id": 990347,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941088406.158, "dur": 39.532, + "args": { + "External id": 990348,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941088454.006, "dur": 32.701, + "args": { + "External id": 990349,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941088495.108, "dur": 30.949, + "args": { + "External id": 990350,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345941088551.596, "dur": 24.934, + "args": { + "External id": 990351,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941088595.609, "dur": 23.923, + "args": { + "External id": 990352,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941088638.122, "dur": 16.918, + "args": { + "External id": 990353,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941088674.530, "dur": 12.652, + "args": { + "External id": 990354,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345941088702.128, "dur": 16.027, + "args": { + "External id": 990355,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941088798.719, "dur": 15.338, + "args": { + "External id": 990356,"Record function id": 0, "Ev Idx": 8339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941088802.328, "dur": 10.788, + "args": { + "External id": 990357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941088806.766, "dur": 5.557, + "args": { + "External id": 990358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941088808.291, "dur": 3.945, + "args": { + "External id": 990359,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941088817.930, "dur": 5.374, + "args": { + "External id": 990360,"Record function id": 0, "Ev Idx": 8343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941088819.739, "dur": 3.110, + "args": { + "External id": 990361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941088820.821, "dur": 1.466, + "args": { + "External id": 990362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941088821.323, "dur": 0.874, + "args": { + "External id": 990363,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941088826.610, "dur": 7.138, + "args": { + "External id": 990364,"Record function id": 0, "Ev Idx": 8347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941088828.194, "dur": 5.082, + "args": { + "External id": 990365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941088828.994, "dur": 3.719, + "args": { + "External id": 990366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941088829.796, "dur": 2.829, + "args": { + "External id": 990367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941088837.054, "dur": 4.557, + "args": { + "External id": 990368,"Record function id": 0, "Ev Idx": 8351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941088838.290, "dur": 2.873, + "args": { + "External id": 990369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941088839.032, "dur": 1.584, + "args": { + "External id": 990370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941088839.708, "dur": 0.844, + "args": { + "External id": 990371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941088844.804, "dur": 4.337, + "args": { + "External id": 990372,"Record function id": 0, "Ev Idx": 8355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941088845.897, "dur": 2.831, + "args": { + "External id": 990373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941088846.983, "dur": 1.202, + "args": { + "External id": 990374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941088847.454, "dur": 0.664, + "args": { + "External id": 990375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941088852.328, "dur": 4.373, + "args": { + "External id": 990376,"Record function id": 0, "Ev Idx": 8359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941088853.517, "dur": 2.753, + "args": { + "External id": 990377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941088854.092, "dur": 1.617, + "args": { + "External id": 990378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941088854.788, "dur": 0.854, + "args": { + "External id": 990379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941088859.948, "dur": 6.198, + "args": { + "External id": 990380,"Record function id": 0, "Ev Idx": 8363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941088861.081, "dur": 4.661, + "args": { + "External id": 990381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941088861.953, "dur": 3.347, + "args": { + "External id": 990382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941088864.574, "dur": 0.628, + "args": { + "External id": 990383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941088869.395, "dur": 3.947, + "args": { + "External id": 990384,"Record function id": 0, "Ev Idx": 8367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941088870.410, "dur": 2.509, + "args": { + "External id": 990385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941088871.283, "dur": 1.038, + "args": { + "External id": 990386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941088871.585, "dur": 0.655, + "args": { + "External id": 990387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941088876.610, "dur": 4.131, + "args": { + "External id": 990388,"Record function id": 0, "Ev Idx": 8371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941088877.692, "dur": 2.609, + "args": { + "External id": 990389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941088878.758, "dur": 1.044, + "args": { + "External id": 990390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941088879.076, "dur": 0.660, + "args": { + "External id": 990391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941088885.700, "dur": 66031.587, + "args": { + "External id": 990392,"Record function id": 0, "Sequence number": 10552512, "Fwd thread id": 1, "Ev Idx": 8375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941088887.239, "dur": 66021.118, + "args": { + "External id": 990393,"Sequence number": 10552512, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8376 + } + }, + { + "ph": "f", "id": 407, "pid": 2338706, "tid": 2379422, "ts": 6345941088887.239, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.20)", "pid": 2338706, "tid": 2379422, + "ts": 6345941088917.388, "dur": 39.294, + "args": { + "External id": 990394,"Record function id": 0, "Ev Idx": 8377 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.20)", "pid": 2338706, "tid": 2379422, + "ts": 6345941088965.064, "dur": 123.689, + "args": { + "External id": 990395,"Record function id": 0, "Ev Idx": 8378 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.20)", "pid": 2338706, "tid": 2379422, + "ts": 6345941089098.221, "dur": 65802.677, + "args": { + "External id": 990396,"Record function id": 0, "Ev Idx": 8379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941089191.993, "dur": 7.635, + "args": { + "External id": 990397,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941089210.792, "dur": 7.021, + "args": { + "External id": 990398,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941089246.216, "dur": 64694.187, + "args": { + "External id": 990399,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941089263.419, "dur": 64663.171, + "args": { + "External id": 990400,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941089360.211, "dur": 18.707, + "args": { + "External id": 990401,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941089398.412, "dur": 64481.222, + "args": { + "External id": 990402,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941089401.332, "dur": 64477.168, + "args": { + "External id": 990403,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941089405.873, "dur": 11.059, + "args": { + "External id": 990404,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941089418.997, "dur": 64454.157, + "args": { + "External id": 990405,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941154099.150, "dur": 14.447, + "args": { + "External id": 990406,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941154103.151, "dur": 9.922, + "args": { + "External id": 990407,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941154146.516, "dur": 371.238, + "args": { + "External id": 990408,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941154179.453, "dur": 333.225, + "args": { + "External id": 990409,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8392, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941154192.670, "dur": 314.548, + "args": { + "External id": 990410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941154540.670, "dur": 2.286, + "args": { + "External id": 990411,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8394, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941154604.455, "dur": 7.254, + "args": { + "External id": 990412,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941154624.740, "dur": 34.990, + "args": { + "External id": 990413,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941154670.219, "dur": 1.785, + "args": { + "External id": 990414,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941154678.116, "dur": 11.849, + "args": { + "External id": 990415,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941154695.782, "dur": 1.045, + "args": { + "External id": 990416,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941154701.558, "dur": 10.407, + "args": { + "External id": 990417,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941154719.019, "dur": 0.982, + "args": { + "External id": 990418,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941154724.324, "dur": 10.361, + "args": { + "External id": 990419,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941154739.684, "dur": 2.970, + "args": { + "External id": 990420,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941154746.844, "dur": 10.367, + "args": { + "External id": 990421,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941154761.540, "dur": 1.705, + "args": { + "External id": 990422,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941154766.961, "dur": 10.766, + "args": { + "External id": 990423,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941154782.464, "dur": 0.930, + "args": { + "External id": 990424,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941154789.896, "dur": 10.118, + "args": { + "External id": 990425,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941154804.864, "dur": 1.054, + "args": { + "External id": 990426,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941154809.871, "dur": 11.034, + "args": { + "External id": 990427,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941154825.238, "dur": 0.825, + "args": { + "External id": 990428,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941154830.275, "dur": 10.256, + "args": { + "External id": 990429,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941154932.719, "dur": 2969.177, + "args": { + "External id": 990430,"Record function id": 0, "Ev Idx": 8413 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.19)", "pid": 2338706, "tid": 2379422, + "ts": 6345941154954.501, "dur": 1138.869, + "args": { + "External id": 990431,"Record function id": 0, "Ev Idx": 8414 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338706, "tid": 2379422, + "ts": 6345941154970.087, "dur": 390.447, + "args": { + "External id": 990432,"Record function id": 0, "Ev Idx": 8415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941155110.759, "dur": 5.184, + "args": { + "External id": 990433,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941155119.525, "dur": 1.243, + "args": { + "External id": 990434,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941155122.979, "dur": 1.214, + "args": { + "External id": 990435,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941155125.765, "dur": 3.504, + "args": { + "External id": 990436,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941155133.062, "dur": 1.070, + "args": { + "External id": 990437,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941155135.587, "dur": 1.280, + "args": { + "External id": 990438,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941155138.672, "dur": 0.807, + "args": { + "External id": 990439,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941155141.152, "dur": 1.180, + "args": { + "External id": 990440,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941155146.420, "dur": 0.846, + "args": { + "External id": 990441,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941155148.711, "dur": 1.025, + "args": { + "External id": 990442,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941155168.879, "dur": 158.764, + "args": { + "External id": 990443,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941155186.758, "dur": 135.783, + "args": { + "External id": 990444,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941155205.800, "dur": 15.568, + "args": { + "External id": 990445,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941155225.000, "dur": 70.733, + "args": { + "External id": 990446,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941155227.792, "dur": 67.594, + "args": { + "External id": 990447,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155231.484, "dur": 8.015, + "args": { + "External id": 990448,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941155243.260, "dur": 51.319, + "args": { + "External id": 990449,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8432 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.18", "pid": 2338706, "tid": 2379422, + "ts": 6345941155455.025, "dur": 594.099, + "args": { + "External id": 990450,"Record function id": 0, "Ev Idx": 8433 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338706, "tid": 2379422, + "ts": 6345941155472.836, "dur": 560.664, + "args": { + "External id": 990451,"Record function id": 0, "Ev Idx": 8434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941155535.179, "dur": 4.900, + "args": { + "External id": 990452,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941155557.248, "dur": 27.799, + "args": { + "External id": 990453,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155562.215, "dur": 1.752, + "args": { + "External id": 990454,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155565.402, "dur": 1.334, + "args": { + "External id": 990455,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155567.559, "dur": 0.636, + "args": { + "External id": 990456,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155569.627, "dur": 0.412, + "args": { + "External id": 990457,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155572.639, "dur": 0.394, + "args": { + "External id": 990458,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155574.280, "dur": 0.552, + "args": { + "External id": 990459,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155576.101, "dur": 2.150, + "args": { + "External id": 990460,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155580.170, "dur": 0.484, + "args": { + "External id": 990461,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155581.585, "dur": 0.267, + "args": { + "External id": 990462,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941155595.692, "dur": 42.381, + "args": { + "External id": 990463,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345941155667.726, "dur": 108.907, + "args": { + "External id": 990464,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941155678.008, "dur": 3.675, + "args": { + "External id": 990465,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345941155686.938, "dur": 10.505, + "args": { + "External id": 990466,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345941155691.331, "dur": 5.656, + "args": { + "External id": 990467,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155694.934, "dur": 0.789, + "args": { + "External id": 990468,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941155703.986, "dur": 23.691, + "args": { + "External id": 990469,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155706.010, "dur": 0.576, + "args": { + "External id": 990470,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155709.410, "dur": 0.475, + "args": { + "External id": 990471,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155711.176, "dur": 0.422, + "args": { + "External id": 990472,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155712.901, "dur": 2.384, + "args": { + "External id": 990473,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155716.541, "dur": 0.383, + "args": { + "External id": 990474,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155718.163, "dur": 0.258, + "args": { + "External id": 990475,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155721.273, "dur": 0.478, + "args": { + "External id": 990476,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155723.127, "dur": 0.259, + "args": { + "External id": 990477,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941155724.614, "dur": 0.251, + "args": { + "External id": 990478,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941155737.356, "dur": 32.060, + "args": { + "External id": 990479,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941155822.001, "dur": 119.627, + "args": { + "External id": 990480,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941155851.410, "dur": 86.887, + "args": { + "External id": 990481,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8464, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941155860.812, "dur": 73.276, + "args": { + "External id": 990482,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941155961.501, "dur": 1.980, + "args": { + "External id": 990483,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8466, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941156102.691, "dur": 1774.637, + "args": { + "External id": 990484,"Sequence number": 10552511, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8467 + } + }, + { + "ph": "f", "id": 408, "pid": 2338706, "tid": 2379422, "ts": 6345941156102.691, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941156222.100, "dur": 112.228, + "args": { + "External id": 990485,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941156384.190, "dur": 44.521, + "args": { + "External id": 990486,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345941156447.609, "dur": 49.041, + "args": { + "External id": 990487,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941156507.059, "dur": 31.553, + "args": { + "External id": 990488,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941156548.100, "dur": 33.935, + "args": { + "External id": 990489,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941156588.910, "dur": 27.965, + "args": { + "External id": 990490,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941156624.741, "dur": 29.136, + "args": { + "External id": 990491,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941156679.781, "dur": 24.760, + "args": { + "External id": 990492,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941156725.728, "dur": 30.110, + "args": { + "External id": 990493,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941156780.510, "dur": 19.954, + "args": { + "External id": 990494,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941156815.446, "dur": 16.701, + "args": { + "External id": 990495,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941156839.882, "dur": 37.438, + "args": { + "External id": 990496,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941156880.687, "dur": 33.643, + "args": { + "External id": 990497,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345941156944.598, "dur": 313.422, + "args": { + "External id": 990498,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941157041.084, "dur": 7.171, + "args": { + "External id": 990499,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941157050.444, "dur": 37.586, + "args": { + "External id": 990500,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941157090.511, "dur": 3.367, + "args": { + "External id": 990501,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941157094.801, "dur": 3.012, + "args": { + "External id": 990502,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941157143.984, "dur": 5.140, + "args": { + "External id": 990503,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941157145.918, "dur": 3.020, + "args": { + "External id": 990504,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941157151.512, "dur": 33.734, + "args": { + "External id": 990505,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941157158.631, "dur": 1.674, + "args": { + "External id": 990506,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941157189.245, "dur": 1.619, + "args": { + "External id": 990507,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941157190.230, "dur": 0.529, + "args": { + "External id": 990508,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941157192.844, "dur": 17.701, + "args": { + "External id": 990509,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941157194.605, "dur": 2.803, + "args": { + "External id": 990510,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941157302.745, "dur": 29.066, + "args": { + "External id": 990511,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941157350.099, "dur": 17.353, + "args": { + "External id": 990512,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941157375.328, "dur": 49.205, + "args": { + "External id": 990513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941157431.052, "dur": 41.986, + "args": { + "External id": 990514,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941157481.497, "dur": 19.869, + "args": { + "External id": 990515,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941157528.623, "dur": 40.903, + "args": { + "External id": 990516,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941157580.069, "dur": 28.657, + "args": { + "External id": 990517,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941157615.928, "dur": 33.665, + "args": { + "External id": 990518,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345941157678.475, "dur": 22.904, + "args": { + "External id": 990519,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941157720.818, "dur": 26.830, + "args": { + "External id": 990520,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941157766.741, "dur": 16.386, + "args": { + "External id": 990521,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941157797.252, "dur": 13.779, + "args": { + "External id": 990522,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345941157829.240, "dur": 15.330, + "args": { + "External id": 990523,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941157925.333, "dur": 17.080, + "args": { + "External id": 990524,"Record function id": 0, "Ev Idx": 8507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941157928.617, "dur": 12.925, + "args": { + "External id": 990525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941157933.373, "dur": 7.220, + "args": { + "External id": 990526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941157936.843, "dur": 3.644, + "args": { + "External id": 990527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941157946.493, "dur": 4.588, + "args": { + "External id": 990528,"Record function id": 0, "Ev Idx": 8511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941157947.660, "dur": 2.959, + "args": { + "External id": 990529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941157948.522, "dur": 1.565, + "args": { + "External id": 990530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941157949.179, "dur": 0.815, + "args": { + "External id": 990531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941157954.334, "dur": 4.388, + "args": { + "External id": 990532,"Record function id": 0, "Ev Idx": 8515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941157955.547, "dur": 2.709, + "args": { + "External id": 990533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941157956.227, "dur": 1.598, + "args": { + "External id": 990534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941157956.819, "dur": 0.919, + "args": { + "External id": 990535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941157961.907, "dur": 4.113, + "args": { + "External id": 990536,"Record function id": 0, "Ev Idx": 8519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941157963.255, "dur": 2.347, + "args": { + "External id": 990537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941157964.015, "dur": 1.173, + "args": { + "External id": 990538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941157964.335, "dur": 0.787, + "args": { + "External id": 990539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941157969.155, "dur": 4.035, + "args": { + "External id": 990540,"Record function id": 0, "Ev Idx": 8523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941157970.314, "dur": 2.423, + "args": { + "External id": 990541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941157971.254, "dur": 1.046, + "args": { + "External id": 990542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941157971.522, "dur": 0.711, + "args": { + "External id": 990543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941157976.271, "dur": 6.064, + "args": { + "External id": 990544,"Record function id": 0, "Ev Idx": 8527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941157977.729, "dur": 4.189, + "args": { + "External id": 990545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941157978.220, "dur": 3.292, + "args": { + "External id": 990546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941157978.704, "dur": 2.737, + "args": { + "External id": 990547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941157985.532, "dur": 3.985, + "args": { + "External id": 990548,"Record function id": 0, "Ev Idx": 8531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941157986.679, "dur": 2.414, + "args": { + "External id": 990549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941157987.240, "dur": 1.360, + "args": { + "External id": 990550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941157987.686, "dur": 0.842, + "args": { + "External id": 990551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941157992.657, "dur": 42.829, + "args": { + "External id": 990552,"Record function id": 0, "Ev Idx": 8535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941158029.084, "dur": 5.376, + "args": { + "External id": 990553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941158031.171, "dur": 2.298, + "args": { + "External id": 990554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941158031.744, "dur": 1.478, + "args": { + "External id": 990555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941158040.799, "dur": 6.721, + "args": { + "External id": 990556,"Record function id": 0, "Ev Idx": 8539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941158042.084, "dur": 4.961, + "args": { + "External id": 990557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941158042.648, "dur": 3.785, + "args": { + "External id": 990558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941158045.518, "dur": 0.850, + "args": { + "External id": 990559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941158082.594, "dur": 63203.947, + "args": { + "External id": 990560,"Record function id": 0, "Sequence number": 10552510, "Fwd thread id": 1, "Ev Idx": 8543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941158121.468, "dur": 63154.954, + "args": { + "External id": 990561,"Sequence number": 10552510, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8544 + } + }, + { + "ph": "f", "id": 409, "pid": 2338706, "tid": 2379422, "ts": 6345941158121.468, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.19)", "pid": 2338706, "tid": 2379422, + "ts": 6345941158157.534, "dur": 42.445, + "args": { + "External id": 990562,"Record function id": 0, "Ev Idx": 8545 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.19)", "pid": 2338706, "tid": 2379422, + "ts": 6345941158208.822, "dur": 74.753, + "args": { + "External id": 990563,"Record function id": 0, "Ev Idx": 8546 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.19)", "pid": 2338706, "tid": 2379422, + "ts": 6345941158289.440, "dur": 62978.489, + "args": { + "External id": 990564,"Record function id": 0, "Ev Idx": 8547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941158386.739, "dur": 9.037, + "args": { + "External id": 990565,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941158407.053, "dur": 5.435, + "args": { + "External id": 990566,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941158426.883, "dur": 61821.664, + "args": { + "External id": 990567,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941158443.991, "dur": 61790.849, + "args": { + "External id": 990568,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941158541.589, "dur": 19.228, + "args": { + "External id": 990569,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941158580.550, "dur": 61610.626, + "args": { + "External id": 990570,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941158586.700, "dur": 61603.271, + "args": { + "External id": 990571,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941158591.363, "dur": 11.306, + "args": { + "External id": 990572,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941158604.846, "dur": 61580.164, + "args": { + "External id": 990573,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941220364.182, "dur": 12.963, + "args": { + "External id": 990574,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941220368.018, "dur": 8.678, + "args": { + "External id": 990575,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941220410.554, "dur": 407.194, + "args": { + "External id": 990576,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941220444.585, "dur": 368.077, + "args": { + "External id": 990577,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8560, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941220457.011, "dur": 350.735, + "args": { + "External id": 990578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941220841.173, "dur": 2.542, + "args": { + "External id": 990579,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8562, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941220905.396, "dur": 6.951, + "args": { + "External id": 990580,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941220925.073, "dur": 34.093, + "args": { + "External id": 990581,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941220969.978, "dur": 3.253, + "args": { + "External id": 990582,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941220979.487, "dur": 12.339, + "args": { + "External id": 990583,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941220997.696, "dur": 0.881, + "args": { + "External id": 990584,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941221002.921, "dur": 30.574, + "args": { + "External id": 990585,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941221041.720, "dur": 1.253, + "args": { + "External id": 990586,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941221047.702, "dur": 44.405, + "args": { + "External id": 990587,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941221103.168, "dur": 1.491, + "args": { + "External id": 990588,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941221110.215, "dur": 11.472, + "args": { + "External id": 990589,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941221126.228, "dur": 1.602, + "args": { + "External id": 990590,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941221132.227, "dur": 11.162, + "args": { + "External id": 990591,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941221148.139, "dur": 1.044, + "args": { + "External id": 990592,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941221153.354, "dur": 10.090, + "args": { + "External id": 990593,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941221170.019, "dur": 0.901, + "args": { + "External id": 990594,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941221175.254, "dur": 10.604, + "args": { + "External id": 990595,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941221190.265, "dur": 0.851, + "args": { + "External id": 990596,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941221195.069, "dur": 9.482, + "args": { + "External id": 990597,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941221304.021, "dur": 2980.338, + "args": { + "External id": 990598,"Record function id": 0, "Ev Idx": 8581 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.18)", "pid": 2338706, "tid": 2379422, + "ts": 6345941221328.057, "dur": 1095.334, + "args": { + "External id": 990599,"Record function id": 0, "Ev Idx": 8582 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338706, "tid": 2379422, + "ts": 6345941221344.876, "dur": 333.601, + "args": { + "External id": 990600,"Record function id": 0, "Ev Idx": 8583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941221437.612, "dur": 6.514, + "args": { + "External id": 990601,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941221447.549, "dur": 0.969, + "args": { + "External id": 990602,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941221450.561, "dur": 1.199, + "args": { + "External id": 990603,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941221453.598, "dur": 0.849, + "args": { + "External id": 990604,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941221456.007, "dur": 0.926, + "args": { + "External id": 990605,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941221460.661, "dur": 1.063, + "args": { + "External id": 990606,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941221463.308, "dur": 0.854, + "args": { + "External id": 990607,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941221465.516, "dur": 1.097, + "args": { + "External id": 990608,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941221468.534, "dur": 3.202, + "args": { + "External id": 990609,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941221475.197, "dur": 1.261, + "args": { + "External id": 990610,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941221494.188, "dur": 154.327, + "args": { + "External id": 990611,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941221512.756, "dur": 130.662, + "args": { + "External id": 990612,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941221531.091, "dur": 16.765, + "args": { + "External id": 990613,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941221551.481, "dur": 63.342, + "args": { + "External id": 990614,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941221554.382, "dur": 60.094, + "args": { + "External id": 990615,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941221557.981, "dur": 5.784, + "args": { + "External id": 990616,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941221565.481, "dur": 48.277, + "args": { + "External id": 990617,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8600 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.17", "pid": 2338706, "tid": 2379422, + "ts": 6345941221764.932, "dur": 650.888, + "args": { + "External id": 990618,"Record function id": 0, "Ev Idx": 8601 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338706, "tid": 2379422, + "ts": 6345941221782.122, "dur": 620.175, + "args": { + "External id": 990619,"Record function id": 0, "Ev Idx": 8602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941221842.384, "dur": 5.279, + "args": { + "External id": 990620,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941221864.916, "dur": 29.353, + "args": { + "External id": 990621,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941221869.601, "dur": 1.437, + "args": { + "External id": 990622,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941221872.915, "dur": 1.625, + "args": { + "External id": 990623,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941221875.882, "dur": 0.507, + "args": { + "External id": 990624,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941221877.696, "dur": 2.815, + "args": { + "External id": 990625,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941221882.635, "dur": 0.394, + "args": { + "External id": 990626,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941221884.456, "dur": 0.578, + "args": { + "External id": 990627,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941221886.383, "dur": 0.680, + "args": { + "External id": 990628,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941221889.056, "dur": 0.444, + "args": { + "External id": 990629,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941221890.823, "dur": 0.655, + "args": { + "External id": 990630,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941221905.227, "dur": 38.565, + "args": { + "External id": 990631,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345941221973.616, "dur": 178.715, + "args": { + "External id": 990632,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941221983.351, "dur": 3.276, + "args": { + "External id": 990633,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345941221991.861, "dur": 12.244, + "args": { + "External id": 990634,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345941221998.125, "dur": 5.525, + "args": { + "External id": 990635,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941222001.773, "dur": 0.622, + "args": { + "External id": 990636,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941222032.384, "dur": 65.478, + "args": { + "External id": 990637,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941222034.527, "dur": 3.160, + "args": { + "External id": 990638,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941222040.494, "dur": 0.433, + "args": { + "External id": 990639,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941222042.185, "dur": 0.451, + "args": { + "External id": 990640,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941222043.731, "dur": 1.620, + "args": { + "External id": 990641,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941222046.817, "dur": 0.655, + "args": { + "External id": 990642,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941222048.893, "dur": 0.701, + "args": { + "External id": 990643,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941222052.092, "dur": 34.511, + "args": { + "External id": 990644,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941222089.390, "dur": 0.457, + "args": { + "External id": 990645,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941222091.404, "dur": 3.026, + "args": { + "External id": 990646,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941222110.654, "dur": 33.463, + "args": { + "External id": 990647,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941222201.834, "dur": 124.290, + "args": { + "External id": 990648,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941222233.117, "dur": 89.275, + "args": { + "External id": 990649,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8632, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941222242.650, "dur": 75.469, + "args": { + "External id": 990650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941222344.325, "dur": 1.972, + "args": { + "External id": 990651,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8634, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941222430.530, "dur": 1829.425, + "args": { + "External id": 990652,"Sequence number": 10552509, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8635 + } + }, + { + "ph": "f", "id": 410, "pid": 2338706, "tid": 2379422, "ts": 6345941222430.530, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941222546.825, "dur": 106.271, + "args": { + "External id": 990653,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941222697.847, "dur": 41.645, + "args": { + "External id": 990654,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345941222760.367, "dur": 50.374, + "args": { + "External id": 990655,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941222820.263, "dur": 31.541, + "args": { + "External id": 990656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941222857.830, "dur": 33.277, + "args": { + "External id": 990657,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941222897.737, "dur": 27.223, + "args": { + "External id": 990658,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941222935.201, "dur": 28.880, + "args": { + "External id": 990659,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941222989.981, "dur": 44.478, + "args": { + "External id": 990660,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941223095.039, "dur": 32.502, + "args": { + "External id": 990661,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941223155.046, "dur": 19.184, + "args": { + "External id": 990662,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941223192.546, "dur": 14.601, + "args": { + "External id": 990663,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941223218.197, "dur": 44.033, + "args": { + "External id": 990664,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941223266.701, "dur": 32.339, + "args": { + "External id": 990665,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345941223331.080, "dur": 254.742, + "args": { + "External id": 990666,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941223412.770, "dur": 7.707, + "args": { + "External id": 990667,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941223423.058, "dur": 3.582, + "args": { + "External id": 990668,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941223427.628, "dur": 2.856, + "args": { + "External id": 990669,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941223431.232, "dur": 5.827, + "args": { + "External id": 990670,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941223480.423, "dur": 4.764, + "args": { + "External id": 990671,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941223482.226, "dur": 2.804, + "args": { + "External id": 990672,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941223487.146, "dur": 35.068, + "args": { + "External id": 990673,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941223494.390, "dur": 1.696, + "args": { + "External id": 990674,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941223524.343, "dur": 1.360, + "args": { + "External id": 990675,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941223525.032, "dur": 0.579, + "args": { + "External id": 990676,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941223526.414, "dur": 15.640, + "args": { + "External id": 990677,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941223528.465, "dur": 0.792, + "args": { + "External id": 990678,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941223628.377, "dur": 26.921, + "args": { + "External id": 990679,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941223675.186, "dur": 17.242, + "args": { + "External id": 990680,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941223700.968, "dur": 40.120, + "args": { + "External id": 990681,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941223747.542, "dur": 37.093, + "args": { + "External id": 990682,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941223792.355, "dur": 36.040, + "args": { + "External id": 990683,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941223845.062, "dur": 39.372, + "args": { + "External id": 990684,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941223893.047, "dur": 30.823, + "args": { + "External id": 990685,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941223930.821, "dur": 29.307, + "args": { + "External id": 990686,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345941223988.298, "dur": 47.383, + "args": { + "External id": 990687,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941224088.586, "dur": 30.662, + "args": { + "External id": 990688,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941224142.019, "dur": 18.820, + "args": { + "External id": 990689,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941224179.676, "dur": 12.956, + "args": { + "External id": 990690,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345941224209.533, "dur": 15.877, + "args": { + "External id": 990691,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941224307.911, "dur": 17.230, + "args": { + "External id": 990692,"Record function id": 0, "Ev Idx": 8675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941224311.571, "dur": 12.594, + "args": { + "External id": 990693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941224316.085, "dur": 6.985, + "args": { + "External id": 990694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941224318.930, "dur": 4.026, + "args": { + "External id": 990695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941224329.288, "dur": 4.944, + "args": { + "External id": 990696,"Record function id": 0, "Ev Idx": 8679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941224330.829, "dur": 2.930, + "args": { + "External id": 990697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941224331.654, "dur": 1.643, + "args": { + "External id": 990698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941224332.463, "dur": 0.735, + "args": { + "External id": 990699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941224337.382, "dur": 4.541, + "args": { + "External id": 990700,"Record function id": 0, "Ev Idx": 8683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941224338.581, "dur": 2.900, + "args": { + "External id": 990701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941224339.442, "dur": 1.574, + "args": { + "External id": 990702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941224340.228, "dur": 0.665, + "args": { + "External id": 990703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941224345.217, "dur": 4.099, + "args": { + "External id": 990704,"Record function id": 0, "Ev Idx": 8687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941224346.430, "dur": 2.478, + "args": { + "External id": 990705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941224347.063, "dur": 1.311, + "args": { + "External id": 990706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941224347.706, "dur": 0.595, + "args": { + "External id": 990707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941224352.482, "dur": 21.270, + "args": { + "External id": 990708,"Record function id": 0, "Ev Idx": 8691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941224370.764, "dur": 2.464, + "args": { + "External id": 990709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941224371.565, "dur": 1.157, + "args": { + "External id": 990710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941224371.906, "dur": 0.744, + "args": { + "External id": 990711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941224377.268, "dur": 8.296, + "args": { + "External id": 990712,"Record function id": 0, "Ev Idx": 8695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941224378.458, "dur": 6.686, + "args": { + "External id": 990713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941224378.930, "dur": 5.791, + "args": { + "External id": 990714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941224381.253, "dur": 3.384, + "args": { + "External id": 990715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941224388.938, "dur": 4.495, + "args": { + "External id": 990716,"Record function id": 0, "Ev Idx": 8699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941224390.292, "dur": 2.744, + "args": { + "External id": 990717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941224391.107, "dur": 1.510, + "args": { + "External id": 990718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941224391.596, "dur": 0.954, + "args": { + "External id": 990719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941224396.547, "dur": 72.977, + "args": { + "External id": 990720,"Record function id": 0, "Ev Idx": 8703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941224466.017, "dur": 3.007, + "args": { + "External id": 990721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941224467.113, "dur": 1.357, + "args": { + "External id": 990722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941224467.648, "dur": 0.705, + "args": { + "External id": 990723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941224473.695, "dur": 6.254, + "args": { + "External id": 990724,"Record function id": 0, "Ev Idx": 8707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941224474.947, "dur": 4.555, + "args": { + "External id": 990725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941224475.611, "dur": 3.460, + "args": { + "External id": 990726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941224478.295, "dur": 0.687, + "args": { + "External id": 990727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941224484.152, "dur": 62635.649, + "args": { + "External id": 990728,"Record function id": 0, "Sequence number": 10552508, "Fwd thread id": 1, "Ev Idx": 8711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941224485.465, "dur": 62622.775, + "args": { + "External id": 990729,"Sequence number": 10552508, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8712 + } + }, + { + "ph": "f", "id": 411, "pid": 2338706, "tid": 2379422, "ts": 6345941224485.465, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.18)", "pid": 2338706, "tid": 2379422, + "ts": 6345941224518.216, "dur": 41.579, + "args": { + "External id": 990730,"Record function id": 0, "Ev Idx": 8713 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.18)", "pid": 2338706, "tid": 2379422, + "ts": 6345941224567.475, "dur": 67.296, + "args": { + "External id": 990731,"Record function id": 0, "Ev Idx": 8714 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.18)", "pid": 2338706, "tid": 2379422, + "ts": 6345941224641.116, "dur": 62457.800, + "args": { + "External id": 990732,"Record function id": 0, "Ev Idx": 8715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941224734.719, "dur": 7.172, + "args": { + "External id": 990733,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941224753.884, "dur": 5.055, + "args": { + "External id": 990734,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941224774.306, "dur": 61353.128, + "args": { + "External id": 990735,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941224790.142, "dur": 61322.873, + "args": { + "External id": 990736,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941224920.531, "dur": 23.854, + "args": { + "External id": 990737,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941224963.854, "dur": 61076.452, + "args": { + "External id": 990738,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941224967.022, "dur": 61072.189, + "args": { + "External id": 990739,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941224971.727, "dur": 9.702, + "args": { + "External id": 990740,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941224983.891, "dur": 61050.211, + "args": { + "External id": 990741,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941286245.781, "dur": 13.602, + "args": { + "External id": 990742,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941286250.000, "dur": 9.023, + "args": { + "External id": 990743,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941286290.817, "dur": 373.842, + "args": { + "External id": 990744,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941286325.507, "dur": 334.348, + "args": { + "External id": 990745,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8728, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941286338.397, "dur": 315.825, + "args": { + "External id": 990746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941286687.826, "dur": 2.275, + "args": { + "External id": 990747,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8730, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941286748.931, "dur": 6.972, + "args": { + "External id": 990748,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941286768.691, "dur": 35.682, + "args": { + "External id": 990749,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941286815.509, "dur": 4.197, + "args": { + "External id": 990750,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941286825.568, "dur": 12.461, + "args": { + "External id": 990751,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941286844.047, "dur": 1.115, + "args": { + "External id": 990752,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941286849.624, "dur": 10.380, + "args": { + "External id": 990753,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941286866.528, "dur": 1.034, + "args": { + "External id": 990754,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941286872.217, "dur": 10.899, + "args": { + "External id": 990755,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941286888.181, "dur": 0.868, + "args": { + "External id": 990756,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941286893.469, "dur": 10.599, + "args": { + "External id": 990757,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941286908.471, "dur": 1.266, + "args": { + "External id": 990758,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941286913.998, "dur": 10.386, + "args": { + "External id": 990759,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941286931.119, "dur": 0.958, + "args": { + "External id": 990760,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941286936.205, "dur": 10.025, + "args": { + "External id": 990761,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941286950.831, "dur": 1.015, + "args": { + "External id": 990762,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941286956.349, "dur": 10.996, + "args": { + "External id": 990763,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941286972.348, "dur": 1.036, + "args": { + "External id": 990764,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941286977.785, "dur": 10.142, + "args": { + "External id": 990765,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941287137.866, "dur": 2996.876, + "args": { + "External id": 990766,"Record function id": 0, "Ev Idx": 8749 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.17)", "pid": 2338706, "tid": 2379422, + "ts": 6345941287161.119, "dur": 1112.615, + "args": { + "External id": 990767,"Record function id": 0, "Ev Idx": 8750 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338706, "tid": 2379422, + "ts": 6345941287177.620, "dur": 344.607, + "args": { + "External id": 990768,"Record function id": 0, "Ev Idx": 8751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941287268.951, "dur": 7.145, + "args": { + "External id": 990769,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941287279.881, "dur": 1.094, + "args": { + "External id": 990770,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941287283.227, "dur": 1.092, + "args": { + "External id": 990771,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941287286.236, "dur": 1.032, + "args": { + "External id": 990772,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941287298.456, "dur": 1.015, + "args": { + "External id": 990773,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941287301.329, "dur": 1.015, + "args": { + "External id": 990774,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941287304.336, "dur": 1.035, + "args": { + "External id": 990775,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941287306.881, "dur": 1.351, + "args": { + "External id": 990776,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941287312.168, "dur": 2.868, + "args": { + "External id": 990777,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941287316.785, "dur": 1.075, + "args": { + "External id": 990778,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941287336.422, "dur": 154.763, + "args": { + "External id": 990779,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941287354.039, "dur": 132.522, + "args": { + "External id": 990780,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941287370.465, "dur": 14.351, + "args": { + "External id": 990781,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941287388.585, "dur": 69.770, + "args": { + "External id": 990782,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941287393.814, "dur": 64.182, + "args": { + "External id": 990783,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287397.513, "dur": 6.351, + "args": { + "External id": 990784,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941287405.601, "dur": 51.810, + "args": { + "External id": 990785,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8768 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.16", "pid": 2338706, "tid": 2379422, + "ts": 6345941287611.179, "dur": 653.937, + "args": { + "External id": 990786,"Record function id": 0, "Ev Idx": 8769 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338706, "tid": 2379422, + "ts": 6345941287629.903, "dur": 621.262, + "args": { + "External id": 990787,"Record function id": 0, "Ev Idx": 8770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941287690.929, "dur": 4.988, + "args": { + "External id": 990788,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941287711.848, "dur": 30.272, + "args": { + "External id": 990789,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287718.473, "dur": 1.798, + "args": { + "External id": 990790,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287722.173, "dur": 1.691, + "args": { + "External id": 990791,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287725.164, "dur": 0.608, + "args": { + "External id": 990792,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287726.921, "dur": 2.633, + "args": { + "External id": 990793,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287732.194, "dur": 0.335, + "args": { + "External id": 990794,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287733.591, "dur": 0.286, + "args": { + "External id": 990795,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287735.054, "dur": 0.571, + "args": { + "External id": 990796,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287737.132, "dur": 0.532, + "args": { + "External id": 990797,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287738.540, "dur": 0.590, + "args": { + "External id": 990798,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941287753.268, "dur": 43.039, + "args": { + "External id": 990799,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345941287827.120, "dur": 112.618, + "args": { + "External id": 990800,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941287836.617, "dur": 3.439, + "args": { + "External id": 990801,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345941287845.516, "dur": 10.661, + "args": { + "External id": 990802,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345941287850.126, "dur": 5.649, + "args": { + "External id": 990803,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287853.780, "dur": 0.710, + "args": { + "External id": 990804,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941287862.752, "dur": 27.047, + "args": { + "External id": 990805,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287864.931, "dur": 2.803, + "args": { + "External id": 990806,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287870.366, "dur": 0.665, + "args": { + "External id": 990807,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287872.567, "dur": 0.472, + "args": { + "External id": 990808,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287874.690, "dur": 1.200, + "args": { + "External id": 990809,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287877.148, "dur": 0.546, + "args": { + "External id": 990810,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287878.733, "dur": 0.538, + "args": { + "External id": 990811,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287881.579, "dur": 0.319, + "args": { + "External id": 990812,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287883.200, "dur": 0.389, + "args": { + "External id": 990813,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941287884.582, "dur": 2.262, + "args": { + "External id": 990814,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941287900.016, "dur": 32.369, + "args": { + "External id": 990815,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941287983.878, "dur": 182.999, + "args": { + "External id": 990816,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941288033.681, "dur": 129.047, + "args": { + "External id": 990817,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8800, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941288044.022, "dur": 113.894, + "args": { + "External id": 990818,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941288189.149, "dur": 2.253, + "args": { + "External id": 990819,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8802, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941288281.872, "dur": 1824.974, + "args": { + "External id": 990820,"Sequence number": 10552507, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8803 + } + }, + { + "ph": "f", "id": 412, "pid": 2338706, "tid": 2379422, "ts": 6345941288281.872, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941288396.721, "dur": 106.550, + "args": { + "External id": 990821,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941288548.596, "dur": 41.404, + "args": { + "External id": 990822,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345941288611.180, "dur": 48.911, + "args": { + "External id": 990823,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941288669.973, "dur": 31.812, + "args": { + "External id": 990824,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941288708.673, "dur": 32.445, + "args": { + "External id": 990825,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941288747.434, "dur": 26.758, + "args": { + "External id": 990826,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941288785.098, "dur": 29.073, + "args": { + "External id": 990827,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941288840.647, "dur": 26.442, + "args": { + "External id": 990828,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941288886.548, "dur": 30.655, + "args": { + "External id": 990829,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941288941.839, "dur": 20.044, + "args": { + "External id": 990830,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941288978.414, "dur": 15.475, + "args": { + "External id": 990831,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941289004.039, "dur": 96.003, + "args": { + "External id": 990832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941289106.325, "dur": 36.507, + "args": { + "External id": 990833,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345941289176.488, "dur": 258.841, + "args": { + "External id": 990834,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941289260.627, "dur": 6.880, + "args": { + "External id": 990835,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941289269.425, "dur": 3.460, + "args": { + "External id": 990836,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941289274.031, "dur": 2.298, + "args": { + "External id": 990837,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941289277.236, "dur": 4.314, + "args": { + "External id": 990838,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941289326.727, "dur": 5.169, + "args": { + "External id": 990839,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941289328.754, "dur": 2.957, + "args": { + "External id": 990840,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941289334.218, "dur": 34.473, + "args": { + "External id": 990841,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941289341.406, "dur": 1.798, + "args": { + "External id": 990842,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941289372.792, "dur": 2.138, + "args": { + "External id": 990843,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941289373.982, "dur": 0.871, + "args": { + "External id": 990844,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941289376.185, "dur": 16.570, + "args": { + "External id": 990845,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941289378.403, "dur": 0.751, + "args": { + "External id": 990846,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941289477.649, "dur": 28.451, + "args": { + "External id": 990847,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941289523.828, "dur": 19.428, + "args": { + "External id": 990848,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941289551.035, "dur": 39.619, + "args": { + "External id": 990849,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941289597.023, "dur": 36.458, + "args": { + "External id": 990850,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941289641.044, "dur": 20.352, + "args": { + "External id": 990851,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941289669.994, "dur": 30.809, + "args": { + "External id": 990852,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941289708.695, "dur": 44.257, + "args": { + "External id": 990853,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941289768.225, "dur": 38.169, + "args": { + "External id": 990854,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345941289832.129, "dur": 27.950, + "args": { + "External id": 990855,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941289879.634, "dur": 29.266, + "args": { + "External id": 990856,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941289928.109, "dur": 17.725, + "args": { + "External id": 990857,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941289959.811, "dur": 19.114, + "args": { + "External id": 990858,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345941289996.618, "dur": 35.099, + "args": { + "External id": 990859,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941290159.147, "dur": 37.376, + "args": { + "External id": 990860,"Record function id": 0, "Ev Idx": 8843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941290162.298, "dur": 32.929, + "args": { + "External id": 990861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941290184.965, "dur": 7.663, + "args": { + "External id": 990862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941290188.029, "dur": 4.465, + "args": { + "External id": 990863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941290200.900, "dur": 5.608, + "args": { + "External id": 990864,"Record function id": 0, "Ev Idx": 8847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941290202.498, "dur": 3.498, + "args": { + "External id": 990865,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941290203.557, "dur": 1.888, + "args": { + "External id": 990866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941290204.265, "dur": 1.083, + "args": { + "External id": 990867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941290209.805, "dur": 7.812, + "args": { + "External id": 990868,"Record function id": 0, "Ev Idx": 8851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941290211.390, "dur": 5.749, + "args": { + "External id": 990869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941290212.075, "dur": 4.584, + "args": { + "External id": 990870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941290215.330, "dur": 1.214, + "args": { + "External id": 990871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941290220.952, "dur": 4.103, + "args": { + "External id": 990872,"Record function id": 0, "Ev Idx": 8855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941290222.264, "dur": 2.368, + "args": { + "External id": 990873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941290223.036, "dur": 1.178, + "args": { + "External id": 990874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941290223.454, "dur": 0.677, + "args": { + "External id": 990875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941290228.198, "dur": 4.100, + "args": { + "External id": 990876,"Record function id": 0, "Ev Idx": 8859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941290229.179, "dur": 2.696, + "args": { + "External id": 990877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941290230.078, "dur": 1.377, + "args": { + "External id": 990878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941290230.762, "dur": 0.629, + "args": { + "External id": 990879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941290235.474, "dur": 6.534, + "args": { + "External id": 990880,"Record function id": 0, "Ev Idx": 8863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941290236.632, "dur": 4.944, + "args": { + "External id": 990881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941290237.118, "dur": 3.946, + "args": { + "External id": 990882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941290237.658, "dur": 3.345, + "args": { + "External id": 990883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941290245.308, "dur": 7.353, + "args": { + "External id": 990884,"Record function id": 0, "Ev Idx": 8867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941290246.565, "dur": 5.682, + "args": { + "External id": 990885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941290247.345, "dur": 4.497, + "args": { + "External id": 990886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941290250.701, "dur": 1.069, + "args": { + "External id": 990887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941290255.921, "dur": 4.364, + "args": { + "External id": 990888,"Record function id": 0, "Ev Idx": 8871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941290257.253, "dur": 2.617, + "args": { + "External id": 990889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941290257.771, "dur": 1.660, + "args": { + "External id": 990890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941290258.394, "dur": 0.973, + "args": { + "External id": 990891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941290263.368, "dur": 4.052, + "args": { + "External id": 990892,"Record function id": 0, "Ev Idx": 8875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941290264.457, "dur": 2.561, + "args": { + "External id": 990893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941290264.998, "dur": 1.618, + "args": { + "External id": 990894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941290265.785, "dur": 0.755, + "args": { + "External id": 990895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941290271.074, "dur": 64489.310, + "args": { + "External id": 990896,"Record function id": 0, "Sequence number": 10552506, "Fwd thread id": 1, "Ev Idx": 8879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941290272.423, "dur": 64478.932, + "args": { + "External id": 990897,"Sequence number": 10552506, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8880 + } + }, + { + "ph": "f", "id": 413, "pid": 2338706, "tid": 2379422, "ts": 6345941290272.423, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.17)", "pid": 2338706, "tid": 2379422, + "ts": 6345941290303.458, "dur": 40.833, + "args": { + "External id": 990898,"Record function id": 0, "Ev Idx": 8881 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.17)", "pid": 2338706, "tid": 2379422, + "ts": 6345941290352.977, "dur": 73.828, + "args": { + "External id": 990899,"Record function id": 0, "Ev Idx": 8882 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.17)", "pid": 2338706, "tid": 2379422, + "ts": 6345941290433.192, "dur": 64310.731, + "args": { + "External id": 990900,"Record function id": 0, "Ev Idx": 8883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941290526.381, "dur": 7.705, + "args": { + "External id": 990901,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941290544.406, "dur": 4.680, + "args": { + "External id": 990902,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941290566.246, "dur": 63086.396, + "args": { + "External id": 990903,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941290580.899, "dur": 63058.341, + "args": { + "External id": 990904,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941290681.573, "dur": 18.486, + "args": { + "External id": 990905,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941290719.946, "dur": 62869.759, + "args": { + "External id": 990906,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941290722.797, "dur": 62865.661, + "args": { + "External id": 990907,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941290752.793, "dur": 9.981, + "args": { + "External id": 990908,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941290765.344, "dur": 62818.150, + "args": { + "External id": 990909,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941353766.415, "dur": 12.269, + "args": { + "External id": 990910,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941353769.913, "dur": 8.349, + "args": { + "External id": 990911,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941353809.205, "dur": 516.790, + "args": { + "External id": 990912,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941353843.174, "dur": 476.343, + "args": { + "External id": 990913,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8896, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941353855.941, "dur": 456.933, + "args": { + "External id": 990914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941354352.773, "dur": 2.743, + "args": { + "External id": 990915,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8898, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941354425.064, "dur": 6.970, + "args": { + "External id": 990916,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941354446.318, "dur": 38.576, + "args": { + "External id": 990917,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941354495.752, "dur": 3.390, + "args": { + "External id": 990918,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941354505.355, "dur": 14.120, + "args": { + "External id": 990919,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941354525.101, "dur": 1.040, + "args": { + "External id": 990920,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941354530.744, "dur": 12.849, + "args": { + "External id": 990921,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941354550.557, "dur": 0.854, + "args": { + "External id": 990922,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941354555.987, "dur": 13.834, + "args": { + "External id": 990923,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941354575.177, "dur": 1.129, + "args": { + "External id": 990924,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941354580.766, "dur": 11.858, + "args": { + "External id": 990925,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941354596.774, "dur": 1.814, + "args": { + "External id": 990926,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941354603.091, "dur": 12.871, + "args": { + "External id": 990927,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941354623.338, "dur": 0.842, + "args": { + "External id": 990928,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941354628.967, "dur": 11.314, + "args": { + "External id": 990929,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941354644.994, "dur": 0.977, + "args": { + "External id": 990930,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941354650.435, "dur": 12.279, + "args": { + "External id": 990931,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941354667.468, "dur": 0.856, + "args": { + "External id": 990932,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941354672.361, "dur": 12.005, + "args": { + "External id": 990933,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941354774.893, "dur": 2956.360, + "args": { + "External id": 990934,"Record function id": 0, "Ev Idx": 8917 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.16)", "pid": 2338706, "tid": 2379422, + "ts": 6345941354794.942, "dur": 1076.372, + "args": { + "External id": 990935,"Record function id": 0, "Ev Idx": 8918 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338706, "tid": 2379422, + "ts": 6345941354810.496, "dur": 385.934, + "args": { + "External id": 990936,"Record function id": 0, "Ev Idx": 8919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941354893.881, "dur": 6.321, + "args": { + "External id": 990937,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941354903.349, "dur": 0.830, + "args": { + "External id": 990938,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941354905.887, "dur": 0.927, + "args": { + "External id": 990939,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941354908.720, "dur": 0.843, + "args": { + "External id": 990940,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941354913.154, "dur": 0.825, + "args": { + "External id": 990941,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941354915.322, "dur": 0.847, + "args": { + "External id": 990942,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941354917.659, "dur": 0.811, + "args": { + "External id": 990943,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941354920.034, "dur": 1.180, + "args": { + "External id": 990944,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941354924.790, "dur": 2.953, + "args": { + "External id": 990945,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941354929.552, "dur": 0.882, + "args": { + "External id": 990946,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941354948.138, "dur": 211.598, + "args": { + "External id": 990947,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941354965.820, "dur": 187.838, + "args": { + "External id": 990948,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941354984.515, "dur": 15.863, + "args": { + "External id": 990949,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941355003.929, "dur": 121.520, + "args": { + "External id": 990950,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941355026.836, "dur": 98.206, + "args": { + "External id": 990951,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355031.387, "dur": 6.696, + "args": { + "External id": 990952,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941355039.923, "dur": 84.106, + "args": { + "External id": 990953,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8936 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.15", "pid": 2338706, "tid": 2379422, + "ts": 6345941355290.351, "dur": 572.733, + "args": { + "External id": 990954,"Record function id": 0, "Ev Idx": 8937 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338706, "tid": 2379422, + "ts": 6345941355308.495, "dur": 541.552, + "args": { + "External id": 990955,"Record function id": 0, "Ev Idx": 8938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941355371.787, "dur": 5.803, + "args": { + "External id": 990956,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941355395.124, "dur": 29.912, + "args": { + "External id": 990957,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355399.704, "dur": 1.753, + "args": { + "External id": 990958,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355403.523, "dur": 1.842, + "args": { + "External id": 990959,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355406.888, "dur": 0.555, + "args": { + "External id": 990960,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355408.783, "dur": 2.607, + "args": { + "External id": 990961,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355413.187, "dur": 0.408, + "args": { + "External id": 990962,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355415.121, "dur": 0.535, + "args": { + "External id": 990963,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355416.945, "dur": 0.559, + "args": { + "External id": 990964,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355419.112, "dur": 0.559, + "args": { + "External id": 990965,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355420.900, "dur": 0.570, + "args": { + "External id": 990966,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941355435.873, "dur": 42.374, + "args": { + "External id": 990967,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345941355512.283, "dur": 106.793, + "args": { + "External id": 990968,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941355522.664, "dur": 3.446, + "args": { + "External id": 990969,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345941355532.015, "dur": 10.189, + "args": { + "External id": 990970,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345941355536.417, "dur": 5.325, + "args": { + "External id": 990971,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355539.884, "dur": 0.494, + "args": { + "External id": 990972,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941355549.592, "dur": 23.092, + "args": { + "External id": 990973,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355551.263, "dur": 2.666, + "args": { + "External id": 990974,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355555.253, "dur": 0.795, + "args": { + "External id": 990975,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355557.300, "dur": 0.736, + "args": { + "External id": 990976,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355559.112, "dur": 0.398, + "args": { + "External id": 990977,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355560.951, "dur": 0.400, + "args": { + "External id": 990978,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355562.786, "dur": 0.305, + "args": { + "External id": 990979,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355564.171, "dur": 0.347, + "args": { + "External id": 990980,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355565.535, "dur": 0.326, + "args": { + "External id": 990981,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941355567.173, "dur": 2.651, + "args": { + "External id": 990982,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941355581.942, "dur": 30.011, + "args": { + "External id": 990983,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941355662.365, "dur": 118.349, + "args": { + "External id": 990984,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941355694.019, "dur": 83.429, + "args": { + "External id": 990985,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8968, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941355703.225, "dur": 69.898, + "args": { + "External id": 990986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941355798.282, "dur": 1.973, + "args": { + "External id": 990987,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8970, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941355878.263, "dur": 1825.854, + "args": { + "External id": 990988,"Sequence number": 10552505, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8971 + } + }, + { + "ph": "f", "id": 414, "pid": 2338706, "tid": 2379422, "ts": 6345941355878.263, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941355987.905, "dur": 160.846, + "args": { + "External id": 990989,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941356200.387, "dur": 39.437, + "args": { + "External id": 990990,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345941356260.945, "dur": 53.794, + "args": { + "External id": 990991,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941356325.716, "dur": 32.380, + "args": { + "External id": 990992,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941356364.774, "dur": 32.419, + "args": { + "External id": 990993,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941356403.676, "dur": 27.195, + "args": { + "External id": 990994,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941356440.560, "dur": 29.030, + "args": { + "External id": 990995,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941356495.479, "dur": 22.841, + "args": { + "External id": 990996,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941356538.237, "dur": 27.199, + "args": { + "External id": 990997,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941356591.858, "dur": 21.445, + "args": { + "External id": 990998,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941356632.347, "dur": 16.828, + "args": { + "External id": 990999,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941356660.254, "dur": 38.871, + "args": { + "External id": 991000,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941356703.161, "dur": 34.183, + "args": { + "External id": 991001,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345941356766.551, "dur": 267.824, + "args": { + "External id": 991002,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941356847.575, "dur": 6.114, + "args": { + "External id": 991003,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941356856.203, "dur": 3.001, + "args": { + "External id": 991004,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941356860.755, "dur": 2.461, + "args": { + "External id": 991005,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941356864.515, "dur": 5.005, + "args": { + "External id": 991006,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941356912.760, "dur": 5.364, + "args": { + "External id": 991007,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941356914.824, "dur": 3.113, + "args": { + "External id": 991008,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941356919.985, "dur": 31.654, + "args": { + "External id": 991009,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941356925.741, "dur": 1.796, + "args": { + "External id": 991010,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941356953.115, "dur": 2.370, + "args": { + "External id": 991011,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941356954.607, "dur": 0.788, + "args": { + "External id": 991012,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941356956.638, "dur": 14.832, + "args": { + "External id": 991013,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941356958.731, "dur": 0.584, + "args": { + "External id": 991014,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941357108.697, "dur": 30.022, + "args": { + "External id": 991015,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941357156.696, "dur": 17.162, + "args": { + "External id": 991016,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941357183.929, "dur": 61.217, + "args": { + "External id": 991017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941357259.066, "dur": 43.582, + "args": { + "External id": 991018,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941357316.648, "dur": 20.169, + "args": { + "External id": 991019,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941357345.383, "dur": 34.816, + "args": { + "External id": 991020,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941357388.457, "dur": 29.413, + "args": { + "External id": 991021,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941357425.573, "dur": 31.466, + "args": { + "External id": 991022,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345941357476.835, "dur": 23.122, + "args": { + "External id": 991023,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941357516.750, "dur": 23.328, + "args": { + "External id": 991024,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941357579.355, "dur": 19.765, + "args": { + "External id": 991025,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941357615.064, "dur": 14.713, + "args": { + "External id": 991026,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345941357645.710, "dur": 23.792, + "args": { + "External id": 991027,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941357754.667, "dur": 17.493, + "args": { + "External id": 991028,"Record function id": 0, "Ev Idx": 9011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941357759.021, "dur": 12.214, + "args": { + "External id": 991029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941357763.835, "dur": 6.190, + "args": { + "External id": 991030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941357765.784, "dur": 4.109, + "args": { + "External id": 991031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941357776.132, "dur": 6.275, + "args": { + "External id": 991032,"Record function id": 0, "Ev Idx": 9015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941357777.997, "dur": 3.911, + "args": { + "External id": 991033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941357778.601, "dur": 2.799, + "args": { + "External id": 991034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941357779.943, "dur": 1.362, + "args": { + "External id": 991035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941357785.702, "dur": 6.042, + "args": { + "External id": 991036,"Record function id": 0, "Ev Idx": 9019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941357787.905, "dur": 3.366, + "args": { + "External id": 991037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941357788.579, "dur": 2.103, + "args": { + "External id": 991038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941357789.544, "dur": 1.064, + "args": { + "External id": 991039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941357794.848, "dur": 5.477, + "args": { + "External id": 991040,"Record function id": 0, "Ev Idx": 9023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941357796.239, "dur": 3.593, + "args": { + "External id": 991041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941357797.157, "dur": 2.201, + "args": { + "External id": 991042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941357798.099, "dur": 1.162, + "args": { + "External id": 991043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941357803.521, "dur": 5.590, + "args": { + "External id": 991044,"Record function id": 0, "Ev Idx": 9027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941357804.782, "dur": 3.876, + "args": { + "External id": 991045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941357805.695, "dur": 2.543, + "args": { + "External id": 991046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941357806.691, "dur": 1.473, + "args": { + "External id": 991047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941357812.222, "dur": 8.338, + "args": { + "External id": 991048,"Record function id": 0, "Ev Idx": 9031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941357813.925, "dur": 6.226, + "args": { + "External id": 991049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941357814.406, "dur": 5.039, + "args": { + "External id": 991050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941357815.473, "dur": 3.885, + "args": { + "External id": 991051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941357823.761, "dur": 5.940, + "args": { + "External id": 991052,"Record function id": 0, "Ev Idx": 9035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941357825.482, "dur": 3.802, + "args": { + "External id": 991053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941357826.115, "dur": 2.612, + "args": { + "External id": 991054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941357827.313, "dur": 1.338, + "args": { + "External id": 991055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941357833.043, "dur": 5.612, + "args": { + "External id": 991056,"Record function id": 0, "Ev Idx": 9039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941357835.050, "dur": 3.164, + "args": { + "External id": 991057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941357835.468, "dur": 2.337, + "args": { + "External id": 991058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941357836.300, "dur": 1.429, + "args": { + "External id": 991059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941357842.196, "dur": 6.155, + "args": { + "External id": 991060,"Record function id": 0, "Ev Idx": 9043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941357843.977, "dur": 3.959, + "args": { + "External id": 991061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941357844.568, "dur": 2.809, + "args": { + "External id": 991062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941357845.813, "dur": 1.479, + "args": { + "External id": 991063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941357853.026, "dur": 60720.045, + "args": { + "External id": 991064,"Record function id": 0, "Sequence number": 10552504, "Fwd thread id": 1, "Ev Idx": 9047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941357854.764, "dur": 60708.859, + "args": { + "External id": 991065,"Sequence number": 10552504, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9048 + } + }, + { + "ph": "f", "id": 415, "pid": 2338706, "tid": 2379422, "ts": 6345941357854.764, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.16)", "pid": 2338706, "tid": 2379422, + "ts": 6345941357885.835, "dur": 42.217, + "args": { + "External id": 991066,"Record function id": 0, "Ev Idx": 9049 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.16)", "pid": 2338706, "tid": 2379422, + "ts": 6345941357936.748, "dur": 66.050, + "args": { + "External id": 991067,"Record function id": 0, "Ev Idx": 9050 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.16)", "pid": 2338706, "tid": 2379422, + "ts": 6345941358026.940, "dur": 60528.632, + "args": { + "External id": 991068,"Record function id": 0, "Ev Idx": 9051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941358158.636, "dur": 7.683, + "args": { + "External id": 991069,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941358178.044, "dur": 8.668, + "args": { + "External id": 991070,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941358202.690, "dur": 59382.187, + "args": { + "External id": 991071,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941358217.372, "dur": 59354.077, + "args": { + "External id": 991072,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941358327.995, "dur": 18.589, + "args": { + "External id": 991073,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941358369.837, "dur": 59160.751, + "args": { + "External id": 991074,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941358375.346, "dur": 59153.979, + "args": { + "External id": 991075,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941358380.183, "dur": 9.028, + "args": { + "External id": 991076,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941358391.516, "dur": 59132.717, + "args": { + "External id": 991077,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941417698.349, "dur": 13.811, + "args": { + "External id": 991078,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941417703.830, "dur": 8.021, + "args": { + "External id": 991079,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941417743.226, "dur": 410.123, + "args": { + "External id": 991080,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941417772.274, "dur": 375.596, + "args": { + "External id": 991081,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9064, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941417785.635, "dur": 356.123, + "args": { + "External id": 991082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941418174.390, "dur": 2.662, + "args": { + "External id": 991083,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9066, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941418243.493, "dur": 7.067, + "args": { + "External id": 991084,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941418263.223, "dur": 36.744, + "args": { + "External id": 991085,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941418310.744, "dur": 3.567, + "args": { + "External id": 991086,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941418320.301, "dur": 15.480, + "args": { + "External id": 991087,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941418341.423, "dur": 1.046, + "args": { + "External id": 991088,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941418347.109, "dur": 11.870, + "args": { + "External id": 991089,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941418363.865, "dur": 0.754, + "args": { + "External id": 991090,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941418369.309, "dur": 13.207, + "args": { + "External id": 991091,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941418387.201, "dur": 0.905, + "args": { + "External id": 991092,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941418392.651, "dur": 11.868, + "args": { + "External id": 991093,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941418408.679, "dur": 1.125, + "args": { + "External id": 991094,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941418414.051, "dur": 12.289, + "args": { + "External id": 991095,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941418430.505, "dur": 1.265, + "args": { + "External id": 991096,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941418436.244, "dur": 11.172, + "args": { + "External id": 991097,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941418454.099, "dur": 1.213, + "args": { + "External id": 991098,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941418459.446, "dur": 12.500, + "args": { + "External id": 991099,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941418476.192, "dur": 1.269, + "args": { + "External id": 991100,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941418481.854, "dur": 11.416, + "args": { + "External id": 991101,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941418587.802, "dur": 3037.019, + "args": { + "External id": 991102,"Record function id": 0, "Ev Idx": 9085 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.15)", "pid": 2338706, "tid": 2379422, + "ts": 6345941418609.546, "dur": 1116.767, + "args": { + "External id": 991103,"Record function id": 0, "Ev Idx": 9086 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338706, "tid": 2379422, + "ts": 6345941418624.901, "dur": 313.324, + "args": { + "External id": 991104,"Record function id": 0, "Ev Idx": 9087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941418708.452, "dur": 6.327, + "args": { + "External id": 991105,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941418717.881, "dur": 0.971, + "args": { + "External id": 991106,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941418721.303, "dur": 1.065, + "args": { + "External id": 991107,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941418724.168, "dur": 0.969, + "args": { + "External id": 991108,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941418727.525, "dur": 0.991, + "args": { + "External id": 991109,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941418729.899, "dur": 1.144, + "args": { + "External id": 991110,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941418732.596, "dur": 0.935, + "args": { + "External id": 991111,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941418737.344, "dur": 2.155, + "args": { + "External id": 991112,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941418741.004, "dur": 2.442, + "args": { + "External id": 991113,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941418744.785, "dur": 0.800, + "args": { + "External id": 991114,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941418763.916, "dur": 145.674, + "args": { + "External id": 991115,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941418781.307, "dur": 123.777, + "args": { + "External id": 991116,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941418797.261, "dur": 15.861, + "args": { + "External id": 991117,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941418817.161, "dur": 61.117, + "args": { + "External id": 991118,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941418820.264, "dur": 57.636, + "args": { + "External id": 991119,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941418824.684, "dur": 5.287, + "args": { + "External id": 991120,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941418832.115, "dur": 45.073, + "args": { + "External id": 991121,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.14", "pid": 2338706, "tid": 2379422, + "ts": 6345941419045.285, "dur": 673.424, + "args": { + "External id": 991122,"Record function id": 0, "Ev Idx": 9105 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338706, "tid": 2379422, + "ts": 6345941419097.053, "dur": 608.732, + "args": { + "External id": 991123,"Record function id": 0, "Ev Idx": 9106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941419166.284, "dur": 6.940, + "args": { + "External id": 991124,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941419190.561, "dur": 31.245, + "args": { + "External id": 991125,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941419195.840, "dur": 1.881, + "args": { + "External id": 991126,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941419200.403, "dur": 0.565, + "args": { + "External id": 991127,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941419203.316, "dur": 0.665, + "args": { + "External id": 991128,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941419205.218, "dur": 2.695, + "args": { + "External id": 991129,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941419209.692, "dur": 0.482, + "args": { + "External id": 991130,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941419211.790, "dur": 0.257, + "args": { + "External id": 991131,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941419213.257, "dur": 0.676, + "args": { + "External id": 991132,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941419215.528, "dur": 0.506, + "args": { + "External id": 991133,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941419217.612, "dur": 0.335, + "args": { + "External id": 991134,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941419233.560, "dur": 43.979, + "args": { + "External id": 991135,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345941419308.939, "dur": 161.751, + "args": { + "External id": 991136,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941419320.158, "dur": 3.047, + "args": { + "External id": 991137,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345941419328.846, "dur": 10.658, + "args": { + "External id": 991138,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345941419333.317, "dur": 5.752, + "args": { + "External id": 991139,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941419337.199, "dur": 0.792, + "args": { + "External id": 991140,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941419346.636, "dur": 75.050, + "args": { + "External id": 991141,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941419390.877, "dur": 9.360, + "args": { + "External id": 991142,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941419402.108, "dur": 0.493, + "args": { + "External id": 991143,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941419403.950, "dur": 0.688, + "args": { + "External id": 991144,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941419406.352, "dur": 0.587, + "args": { + "External id": 991145,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941419408.713, "dur": 0.438, + "args": { + "External id": 991146,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941419410.517, "dur": 0.467, + "args": { + "External id": 991147,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941419412.552, "dur": 0.447, + "args": { + "External id": 991148,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941419414.577, "dur": 0.278, + "args": { + "External id": 991149,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941419416.020, "dur": 2.544, + "args": { + "External id": 991150,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941419432.990, "dur": 29.330, + "args": { + "External id": 991151,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941419520.381, "dur": 113.565, + "args": { + "External id": 991152,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941419548.446, "dur": 82.175, + "args": { + "External id": 991153,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9136, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941419558.300, "dur": 67.673, + "args": { + "External id": 991154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941419650.655, "dur": 1.882, + "args": { + "External id": 991155,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9138, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941419733.418, "dur": 1862.654, + "args": { + "External id": 991156,"Sequence number": 10552503, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9139 + } + }, + { + "ph": "f", "id": 416, "pid": 2338706, "tid": 2379422, "ts": 6345941419733.418, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941419843.646, "dur": 101.395, + "args": { + "External id": 991157,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941419987.224, "dur": 59.686, + "args": { + "External id": 991158,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345941420106.846, "dur": 59.143, + "args": { + "External id": 991159,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941420177.101, "dur": 32.345, + "args": { + "External id": 991160,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941420215.873, "dur": 32.484, + "args": { + "External id": 991161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941420255.162, "dur": 28.130, + "args": { + "External id": 991162,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941420292.455, "dur": 28.927, + "args": { + "External id": 991163,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941420349.380, "dur": 26.124, + "args": { + "External id": 991164,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941420399.629, "dur": 30.667, + "args": { + "External id": 991165,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941420452.378, "dur": 22.058, + "args": { + "External id": 991166,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941420490.170, "dur": 17.026, + "args": { + "External id": 991167,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941420518.804, "dur": 38.043, + "args": { + "External id": 991168,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941420561.126, "dur": 34.923, + "args": { + "External id": 991169,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345941420626.114, "dur": 257.320, + "args": { + "External id": 991170,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941420706.580, "dur": 7.016, + "args": { + "External id": 991171,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941420716.083, "dur": 3.204, + "args": { + "External id": 991172,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941420721.203, "dur": 2.301, + "args": { + "External id": 991173,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941420724.754, "dur": 4.759, + "args": { + "External id": 991174,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941420777.843, "dur": 5.295, + "args": { + "External id": 991175,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941420780.166, "dur": 2.810, + "args": { + "External id": 991176,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941420785.217, "dur": 33.686, + "args": { + "External id": 991177,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941420790.623, "dur": 2.182, + "args": { + "External id": 991178,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941420820.494, "dur": 1.879, + "args": { + "External id": 991179,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941420821.564, "dur": 0.714, + "args": { + "External id": 991180,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941420823.816, "dur": 17.519, + "args": { + "External id": 991181,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941420826.218, "dur": 0.894, + "args": { + "External id": 991182,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941420920.907, "dur": 28.828, + "args": { + "External id": 991183,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941420967.317, "dur": 18.076, + "args": { + "External id": 991184,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941420994.887, "dur": 97.336, + "args": { + "External id": 991185,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941421103.983, "dur": 59.741, + "args": { + "External id": 991186,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941421182.482, "dur": 29.256, + "args": { + "External id": 991187,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941421220.907, "dur": 37.619, + "args": { + "External id": 991188,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941421266.628, "dur": 27.951, + "args": { + "External id": 991189,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941421329.586, "dur": 34.535, + "args": { + "External id": 991190,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345941421390.159, "dur": 27.066, + "args": { + "External id": 991191,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941421436.504, "dur": 25.569, + "args": { + "External id": 991192,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941421477.777, "dur": 17.360, + "args": { + "External id": 991193,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941421511.622, "dur": 13.636, + "args": { + "External id": 991194,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345941421540.174, "dur": 17.204, + "args": { + "External id": 991195,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941421651.419, "dur": 16.715, + "args": { + "External id": 991196,"Record function id": 0, "Ev Idx": 9179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941421655.296, "dur": 11.969, + "args": { + "External id": 991197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941421659.784, "dur": 6.707, + "args": { + "External id": 991198,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941421661.779, "dur": 4.580, + "args": { + "External id": 991199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941421672.152, "dur": 5.946, + "args": { + "External id": 991200,"Record function id": 0, "Ev Idx": 9183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941421673.713, "dur": 3.777, + "args": { + "External id": 991201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941421674.433, "dur": 2.575, + "args": { + "External id": 991202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941421675.473, "dur": 1.436, + "args": { + "External id": 991203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941421681.342, "dur": 5.056, + "args": { + "External id": 991204,"Record function id": 0, "Ev Idx": 9187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941421682.842, "dur": 3.115, + "args": { + "External id": 991205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941421683.368, "dur": 2.163, + "args": { + "External id": 991206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941421684.290, "dur": 1.150, + "args": { + "External id": 991207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941421689.613, "dur": 4.689, + "args": { + "External id": 991208,"Record function id": 0, "Ev Idx": 9191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941421691.209, "dur": 2.652, + "args": { + "External id": 991209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941421691.730, "dur": 1.732, + "args": { + "External id": 991210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941421692.420, "dur": 0.960, + "args": { + "External id": 991211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941421697.495, "dur": 5.678, + "args": { + "External id": 991212,"Record function id": 0, "Ev Idx": 9195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941421699.053, "dur": 3.655, + "args": { + "External id": 991213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941421699.705, "dur": 2.554, + "args": { + "External id": 991214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941421700.858, "dur": 1.318, + "args": { + "External id": 991215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941421706.476, "dur": 7.857, + "args": { + "External id": 991216,"Record function id": 0, "Ev Idx": 9199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941421708.033, "dur": 5.833, + "args": { + "External id": 991217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941421708.473, "dur": 4.731, + "args": { + "External id": 991218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941421709.402, "dur": 3.707, + "args": { + "External id": 991219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941421717.606, "dur": 5.899, + "args": { + "External id": 991220,"Record function id": 0, "Ev Idx": 9203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941421719.339, "dur": 3.761, + "args": { + "External id": 991221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941421720.007, "dur": 2.511, + "args": { + "External id": 991222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941421721.312, "dur": 1.102, + "args": { + "External id": 991223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941421727.146, "dur": 4.794, + "args": { + "External id": 991224,"Record function id": 0, "Ev Idx": 9207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941421728.447, "dur": 3.011, + "args": { + "External id": 991225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941421729.081, "dur": 1.859, + "args": { + "External id": 991226,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941421729.685, "dur": 1.181, + "args": { + "External id": 991227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941421735.382, "dur": 3.741, + "args": { + "External id": 991228,"Record function id": 0, "Ev Idx": 9211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941421736.417, "dur": 2.303, + "args": { + "External id": 991229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941421736.941, "dur": 1.223, + "args": { + "External id": 991230,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941421737.220, "dur": 0.873, + "args": { + "External id": 991231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941421743.526, "dur": 60632.666, + "args": { + "External id": 991232,"Record function id": 0, "Sequence number": 10552502, "Fwd thread id": 1, "Ev Idx": 9215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941421796.538, "dur": 60569.906, + "args": { + "External id": 991233,"Sequence number": 10552502, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9216 + } + }, + { + "ph": "f", "id": 417, "pid": 2338706, "tid": 2379422, "ts": 6345941421796.538, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.15)", "pid": 2338706, "tid": 2379422, + "ts": 6345941421829.400, "dur": 41.477, + "args": { + "External id": 991234,"Record function id": 0, "Ev Idx": 9217 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.15)", "pid": 2338706, "tid": 2379422, + "ts": 6345941421879.011, "dur": 67.296, + "args": { + "External id": 991235,"Record function id": 0, "Ev Idx": 9218 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.15)", "pid": 2338706, "tid": 2379422, + "ts": 6345941421952.967, "dur": 60405.582, + "args": { + "External id": 991236,"Record function id": 0, "Ev Idx": 9219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941422098.552, "dur": 9.049, + "args": { + "External id": 991237,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941422120.121, "dur": 5.301, + "args": { + "External id": 991238,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941422142.261, "dur": 59249.133, + "args": { + "External id": 991239,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941422157.787, "dur": 59220.739, + "args": { + "External id": 991240,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941422306.883, "dur": 19.568, + "args": { + "External id": 991241,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941422345.649, "dur": 58988.935, + "args": { + "External id": 991242,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941422351.204, "dur": 58982.292, + "args": { + "External id": 991243,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941422356.011, "dur": 8.606, + "args": { + "External id": 991244,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941422366.776, "dur": 58961.853, + "args": { + "External id": 991245,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941481505.185, "dur": 14.382, + "args": { + "External id": 991246,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941481510.389, "dur": 8.681, + "args": { + "External id": 991247,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941481551.186, "dur": 355.769, + "args": { + "External id": 991248,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941481580.219, "dur": 322.072, + "args": { + "External id": 991249,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9232, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941481593.569, "dur": 302.583, + "args": { + "External id": 991250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941481926.730, "dur": 2.653, + "args": { + "External id": 991251,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9234, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941481987.566, "dur": 6.700, + "args": { + "External id": 991252,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941482023.980, "dur": 70.901, + "args": { + "External id": 991253,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941482109.811, "dur": 5.234, + "args": { + "External id": 991254,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941482122.491, "dur": 16.306, + "args": { + "External id": 991255,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941482144.528, "dur": 1.063, + "args": { + "External id": 991256,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941482150.775, "dur": 11.511, + "args": { + "External id": 991257,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941482167.492, "dur": 0.877, + "args": { + "External id": 991258,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941482172.911, "dur": 13.044, + "args": { + "External id": 991259,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941482189.977, "dur": 1.043, + "args": { + "External id": 991260,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941482196.369, "dur": 11.471, + "args": { + "External id": 991261,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941482212.111, "dur": 1.151, + "args": { + "External id": 991262,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941482217.677, "dur": 12.751, + "args": { + "External id": 991263,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941482234.975, "dur": 0.702, + "args": { + "External id": 991264,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941482239.960, "dur": 11.312, + "args": { + "External id": 991265,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941482255.304, "dur": 0.991, + "args": { + "External id": 991266,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941482260.153, "dur": 13.425, + "args": { + "External id": 991267,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941482277.300, "dur": 1.049, + "args": { + "External id": 991268,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941482284.286, "dur": 11.265, + "args": { + "External id": 991269,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941482393.539, "dur": 2935.407, + "args": { + "External id": 991270,"Record function id": 0, "Ev Idx": 9253 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.14)", "pid": 2338706, "tid": 2379422, + "ts": 6345941482416.776, "dur": 1066.428, + "args": { + "External id": 991271,"Record function id": 0, "Ev Idx": 9254 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338706, "tid": 2379422, + "ts": 6345941482433.638, "dur": 323.544, + "args": { + "External id": 991272,"Record function id": 0, "Ev Idx": 9255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941482523.052, "dur": 6.722, + "args": { + "External id": 991273,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941482532.939, "dur": 0.922, + "args": { + "External id": 991274,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941482536.093, "dur": 0.818, + "args": { + "External id": 991275,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941482538.484, "dur": 0.951, + "args": { + "External id": 991276,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941482540.877, "dur": 0.851, + "args": { + "External id": 991277,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941482543.237, "dur": 0.783, + "args": { + "External id": 991278,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941482545.460, "dur": 0.939, + "args": { + "External id": 991279,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941482550.309, "dur": 2.154, + "args": { + "External id": 991280,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941482554.049, "dur": 3.167, + "args": { + "External id": 991281,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941482558.629, "dur": 0.754, + "args": { + "External id": 991282,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941482577.575, "dur": 148.782, + "args": { + "External id": 991283,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941482595.551, "dur": 126.308, + "args": { + "External id": 991284,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941482613.243, "dur": 15.391, + "args": { + "External id": 991285,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941482632.489, "dur": 62.030, + "args": { + "External id": 991286,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941482635.236, "dur": 58.914, + "args": { + "External id": 991287,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941482639.245, "dur": 5.370, + "args": { + "External id": 991288,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941482646.499, "dur": 47.041, + "args": { + "External id": 991289,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9272 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.13", "pid": 2338706, "tid": 2379422, + "ts": 6345941482845.110, "dur": 630.745, + "args": { + "External id": 991290,"Record function id": 0, "Ev Idx": 9273 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338706, "tid": 2379422, + "ts": 6345941482861.202, "dur": 602.300, + "args": { + "External id": 991291,"Record function id": 0, "Ev Idx": 9274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941482919.379, "dur": 5.132, + "args": { + "External id": 991292,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941482940.344, "dur": 28.154, + "args": { + "External id": 991293,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941482945.239, "dur": 1.307, + "args": { + "External id": 991294,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941482948.806, "dur": 0.662, + "args": { + "External id": 991295,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941482951.286, "dur": 0.490, + "args": { + "External id": 991296,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941482952.680, "dur": 2.284, + "args": { + "External id": 991297,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941482956.552, "dur": 0.603, + "args": { + "External id": 991298,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941482959.035, "dur": 0.318, + "args": { + "External id": 991299,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941482960.273, "dur": 0.456, + "args": { + "External id": 991300,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941482962.645, "dur": 0.400, + "args": { + "External id": 991301,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941482964.393, "dur": 0.471, + "args": { + "External id": 991302,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941482979.120, "dur": 60.643, + "args": { + "External id": 991303,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345941483108.908, "dur": 120.604, + "args": { + "External id": 991304,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941483120.840, "dur": 5.058, + "args": { + "External id": 991305,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345941483131.725, "dur": 11.392, + "args": { + "External id": 991306,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345941483136.061, "dur": 6.630, + "args": { + "External id": 991307,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941483140.124, "dur": 0.876, + "args": { + "External id": 991308,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941483150.375, "dur": 25.587, + "args": { + "External id": 991309,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941483152.269, "dur": 2.771, + "args": { + "External id": 991310,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941483156.728, "dur": 0.450, + "args": { + "External id": 991311,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941483158.942, "dur": 0.661, + "args": { + "External id": 991312,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941483160.661, "dur": 0.584, + "args": { + "External id": 991313,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941483162.519, "dur": 0.622, + "args": { + "External id": 991314,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941483164.608, "dur": 0.446, + "args": { + "External id": 991315,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941483166.134, "dur": 0.626, + "args": { + "External id": 991316,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941483168.440, "dur": 0.394, + "args": { + "External id": 991317,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941483170.574, "dur": 2.815, + "args": { + "External id": 991318,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941483188.001, "dur": 33.802, + "args": { + "External id": 991319,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941483278.158, "dur": 115.195, + "args": { + "External id": 991320,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941483303.285, "dur": 86.520, + "args": { + "External id": 991321,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9304, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941483313.602, "dur": 71.564, + "args": { + "External id": 991322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941483410.125, "dur": 2.020, + "args": { + "External id": 991323,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9306, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941483491.032, "dur": 1813.647, + "args": { + "External id": 991324,"Sequence number": 10552501, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9307 + } + }, + { + "ph": "f", "id": 418, "pid": 2338706, "tid": 2379422, "ts": 6345941483491.032, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941483603.436, "dur": 102.653, + "args": { + "External id": 991325,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941483747.418, "dur": 39.724, + "args": { + "External id": 991326,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345941483807.871, "dur": 47.860, + "args": { + "External id": 991327,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941483865.489, "dur": 31.214, + "args": { + "External id": 991328,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941483902.936, "dur": 32.547, + "args": { + "External id": 991329,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941483942.492, "dur": 27.429, + "args": { + "External id": 991330,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941483979.440, "dur": 48.938, + "args": { + "External id": 991331,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941484092.516, "dur": 29.109, + "args": { + "External id": 991332,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941484143.518, "dur": 28.957, + "args": { + "External id": 991333,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941484197.556, "dur": 19.221, + "args": { + "External id": 991334,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941484231.299, "dur": 15.787, + "args": { + "External id": 991335,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941484258.987, "dur": 42.429, + "args": { + "External id": 991336,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941484305.378, "dur": 34.407, + "args": { + "External id": 991337,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345941484370.625, "dur": 246.007, + "args": { + "External id": 991338,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941484450.732, "dur": 6.219, + "args": { + "External id": 991339,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941484459.178, "dur": 3.305, + "args": { + "External id": 991340,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941484463.729, "dur": 2.507, + "args": { + "External id": 991341,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941484467.453, "dur": 4.441, + "args": { + "External id": 991342,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941484515.365, "dur": 4.996, + "args": { + "External id": 991343,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941484517.217, "dur": 2.994, + "args": { + "External id": 991344,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941484522.252, "dur": 31.971, + "args": { + "External id": 991345,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941484527.645, "dur": 2.017, + "args": { + "External id": 991346,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941484555.545, "dur": 1.819, + "args": { + "External id": 991347,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941484556.621, "dur": 0.658, + "args": { + "External id": 991348,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941484558.565, "dur": 14.938, + "args": { + "External id": 991349,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941484561.501, "dur": 0.452, + "args": { + "External id": 991350,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941484652.077, "dur": 28.611, + "args": { + "External id": 991351,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941484698.416, "dur": 16.063, + "args": { + "External id": 991352,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941484722.804, "dur": 40.207, + "args": { + "External id": 991353,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941484770.583, "dur": 38.133, + "args": { + "External id": 991354,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941484840.375, "dur": 24.415, + "args": { + "External id": 991355,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941484872.619, "dur": 33.233, + "args": { + "External id": 991356,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941484914.987, "dur": 29.226, + "args": { + "External id": 991357,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941484952.436, "dur": 46.367, + "args": { + "External id": 991358,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345941485048.303, "dur": 66.863, + "args": { + "External id": 991359,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941485138.496, "dur": 29.193, + "args": { + "External id": 991360,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941485185.023, "dur": 17.949, + "args": { + "External id": 991361,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941485220.953, "dur": 14.903, + "args": { + "External id": 991362,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345941485253.543, "dur": 17.727, + "args": { + "External id": 991363,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941485351.868, "dur": 17.301, + "args": { + "External id": 991364,"Record function id": 0, "Ev Idx": 9347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941485355.825, "dur": 12.336, + "args": { + "External id": 991365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941485360.721, "dur": 6.405, + "args": { + "External id": 991366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941485362.385, "dur": 4.623, + "args": { + "External id": 991367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941485372.987, "dur": 6.332, + "args": { + "External id": 991368,"Record function id": 0, "Ev Idx": 9351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941485374.909, "dur": 3.959, + "args": { + "External id": 991369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941485375.687, "dur": 2.534, + "args": { + "External id": 991370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941485376.670, "dur": 1.464, + "args": { + "External id": 991371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941485382.550, "dur": 5.720, + "args": { + "External id": 991372,"Record function id": 0, "Ev Idx": 9355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941485384.269, "dur": 3.570, + "args": { + "External id": 991373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941485384.747, "dur": 2.540, + "args": { + "External id": 991374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941485385.941, "dur": 1.249, + "args": { + "External id": 991375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941485391.487, "dur": 7.987, + "args": { + "External id": 991376,"Record function id": 0, "Ev Idx": 9359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941485393.272, "dur": 5.771, + "args": { + "External id": 991377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941485393.763, "dur": 4.837, + "args": { + "External id": 991378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941485395.102, "dur": 3.409, + "args": { + "External id": 991379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941485402.631, "dur": 5.604, + "args": { + "External id": 991380,"Record function id": 0, "Ev Idx": 9363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941485403.914, "dur": 3.900, + "args": { + "External id": 991381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941485404.378, "dur": 2.889, + "args": { + "External id": 991382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941485405.469, "dur": 1.727, + "args": { + "External id": 991383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941485411.410, "dur": 5.631, + "args": { + "External id": 991384,"Record function id": 0, "Ev Idx": 9367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941485413.329, "dur": 3.305, + "args": { + "External id": 991385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941485413.958, "dur": 2.243, + "args": { + "External id": 991386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941485414.855, "dur": 1.262, + "args": { + "External id": 991387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941485420.284, "dur": 5.648, + "args": { + "External id": 991388,"Record function id": 0, "Ev Idx": 9371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941485421.987, "dur": 3.541, + "args": { + "External id": 991389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941485422.455, "dur": 2.626, + "args": { + "External id": 991390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941485423.674, "dur": 1.325, + "args": { + "External id": 991391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941485428.979, "dur": 6.273, + "args": { + "External id": 991392,"Record function id": 0, "Ev Idx": 9375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941485430.456, "dur": 4.390, + "args": { + "External id": 991393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941485431.363, "dur": 2.939, + "args": { + "External id": 991394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941485432.741, "dur": 1.481, + "args": { + "External id": 991395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941485438.762, "dur": 6.059, + "args": { + "External id": 991396,"Record function id": 0, "Ev Idx": 9379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941485440.249, "dur": 4.145, + "args": { + "External id": 991397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941485440.735, "dur": 3.088, + "args": { + "External id": 991398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941485442.341, "dur": 1.409, + "args": { + "External id": 991399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941485449.186, "dur": 61273.582, + "args": { + "External id": 991400,"Record function id": 0, "Sequence number": 10552500, "Fwd thread id": 1, "Ev Idx": 9383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941485450.554, "dur": 61262.720, + "args": { + "External id": 991401,"Sequence number": 10552500, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9384 + } + }, + { + "ph": "f", "id": 419, "pid": 2338706, "tid": 2379422, "ts": 6345941485450.554, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.14)", "pid": 2338706, "tid": 2379422, + "ts": 6345941485484.846, "dur": 44.247, + "args": { + "External id": 991402,"Record function id": 0, "Ev Idx": 9385 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.14)", "pid": 2338706, "tid": 2379422, + "ts": 6345941485538.019, "dur": 67.761, + "args": { + "External id": 991403,"Record function id": 0, "Ev Idx": 9386 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.14)", "pid": 2338706, "tid": 2379422, + "ts": 6345941485612.042, "dur": 61093.439, + "args": { + "External id": 991404,"Record function id": 0, "Ev Idx": 9387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941485703.407, "dur": 6.395, + "args": { + "External id": 991405,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941485720.207, "dur": 4.829, + "args": { + "External id": 991406,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941485739.252, "dur": 59948.241, + "args": { + "External id": 991407,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941485754.399, "dur": 59919.450, + "args": { + "External id": 991408,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941485858.036, "dur": 18.287, + "args": { + "External id": 991409,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941485895.338, "dur": 59734.567, + "args": { + "External id": 991410,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941485898.128, "dur": 59730.875, + "args": { + "External id": 991411,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941485902.430, "dur": 11.305, + "args": { + "External id": 991412,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941485918.485, "dur": 59705.442, + "args": { + "External id": 991413,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941545798.366, "dur": 11.436, + "args": { + "External id": 991414,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941545802.014, "dur": 7.396, + "args": { + "External id": 991415,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941545840.962, "dur": 468.547, + "args": { + "External id": 991416,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941545873.661, "dur": 429.595, + "args": { + "External id": 991417,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9400, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941545885.327, "dur": 411.680, + "args": { + "External id": 991418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941546333.850, "dur": 2.852, + "args": { + "External id": 991419,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9402, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941546405.485, "dur": 7.141, + "args": { + "External id": 991420,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941546425.787, "dur": 36.116, + "args": { + "External id": 991421,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941546472.268, "dur": 1.779, + "args": { + "External id": 991422,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941546479.287, "dur": 14.280, + "args": { + "External id": 991423,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941546499.103, "dur": 1.189, + "args": { + "External id": 991424,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941546504.303, "dur": 11.913, + "args": { + "External id": 991425,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941546521.072, "dur": 1.168, + "args": { + "External id": 991426,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941546526.283, "dur": 13.815, + "args": { + "External id": 991427,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941546544.679, "dur": 0.922, + "args": { + "External id": 991428,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941546550.467, "dur": 11.436, + "args": { + "External id": 991429,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941546565.846, "dur": 1.595, + "args": { + "External id": 991430,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941546571.280, "dur": 11.915, + "args": { + "External id": 991431,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941546587.214, "dur": 3.012, + "args": { + "External id": 991432,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941546594.332, "dur": 9.845, + "args": { + "External id": 991433,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941546611.136, "dur": 0.894, + "args": { + "External id": 991434,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941546616.141, "dur": 10.365, + "args": { + "External id": 991435,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941546630.470, "dur": 0.964, + "args": { + "External id": 991436,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941546635.396, "dur": 9.840, + "args": { + "External id": 991437,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941546738.139, "dur": 2976.191, + "args": { + "External id": 991438,"Record function id": 0, "Ev Idx": 9421 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.13)", "pid": 2338706, "tid": 2379422, + "ts": 6345941546759.608, "dur": 1100.407, + "args": { + "External id": 991439,"Record function id": 0, "Ev Idx": 9422 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338706, "tid": 2379422, + "ts": 6345941546774.283, "dur": 380.585, + "args": { + "External id": 991440,"Record function id": 0, "Ev Idx": 9423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941546858.174, "dur": 4.352, + "args": { + "External id": 991441,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941546865.365, "dur": 1.050, + "args": { + "External id": 991442,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941546867.950, "dur": 1.003, + "args": { + "External id": 991443,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941546871.006, "dur": 0.769, + "args": { + "External id": 991444,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941546873.359, "dur": 1.069, + "args": { + "External id": 991445,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941546878.292, "dur": 2.818, + "args": { + "External id": 991446,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941546882.746, "dur": 0.844, + "args": { + "External id": 991447,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941546884.942, "dur": 2.148, + "args": { + "External id": 991448,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941546888.506, "dur": 0.943, + "args": { + "External id": 991449,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941546893.036, "dur": 0.816, + "args": { + "External id": 991450,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941546911.699, "dur": 205.138, + "args": { + "External id": 991451,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941546929.230, "dur": 181.342, + "args": { + "External id": 991452,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941546946.128, "dur": 15.987, + "args": { + "External id": 991453,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941546966.227, "dur": 80.218, + "args": { + "External id": 991454,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941546969.020, "dur": 77.032, + "args": { + "External id": 991455,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941546972.906, "dur": 5.429, + "args": { + "External id": 991456,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941546980.254, "dur": 64.752, + "args": { + "External id": 991457,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9440 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.12", "pid": 2338706, "tid": 2379422, + "ts": 6345941547251.075, "dur": 601.383, + "args": { + "External id": 991458,"Record function id": 0, "Ev Idx": 9441 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338706, "tid": 2379422, + "ts": 6345941547270.220, "dur": 569.751, + "args": { + "External id": 991459,"Record function id": 0, "Ev Idx": 9442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941547333.336, "dur": 6.361, + "args": { + "External id": 991460,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941547355.726, "dur": 31.173, + "args": { + "External id": 991461,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941547360.839, "dur": 3.893, + "args": { + "External id": 991462,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941547367.126, "dur": 0.544, + "args": { + "External id": 991463,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941547369.101, "dur": 0.441, + "args": { + "External id": 991464,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941547370.766, "dur": 0.423, + "args": { + "External id": 991465,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941547373.079, "dur": 0.433, + "args": { + "External id": 991466,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941547374.921, "dur": 0.338, + "args": { + "External id": 991467,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941547376.108, "dur": 0.435, + "args": { + "External id": 991468,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941547378.277, "dur": 0.393, + "args": { + "External id": 991469,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941547380.244, "dur": 2.683, + "args": { + "External id": 991470,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941547398.443, "dur": 43.663, + "args": { + "External id": 991471,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345941547472.655, "dur": 135.684, + "args": { + "External id": 991472,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941547483.268, "dur": 2.962, + "args": { + "External id": 991473,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345941547491.509, "dur": 10.539, + "args": { + "External id": 991474,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345941547496.035, "dur": 5.635, + "args": { + "External id": 991475,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941547499.799, "dur": 0.742, + "args": { + "External id": 991476,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941547508.209, "dur": 47.716, + "args": { + "External id": 991477,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941547510.297, "dur": 0.467, + "args": { + "External id": 991478,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941547512.450, "dur": 0.393, + "args": { + "External id": 991479,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941547514.600, "dur": 0.562, + "args": { + "External id": 991480,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941547516.178, "dur": 0.539, + "args": { + "External id": 991481,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941547518.729, "dur": 0.374, + "args": { + "External id": 991482,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941547543.529, "dur": 2.806, + "args": { + "External id": 991483,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941547548.275, "dur": 0.415, + "args": { + "External id": 991484,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941547550.668, "dur": 0.320, + "args": { + "External id": 991485,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941547552.463, "dur": 0.760, + "args": { + "External id": 991486,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941547567.479, "dur": 33.105, + "args": { + "External id": 991487,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941547655.766, "dur": 116.529, + "args": { + "External id": 991488,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941547681.729, "dur": 86.801, + "args": { + "External id": 991489,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9472, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941547692.550, "dur": 71.920, + "args": { + "External id": 991490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941547788.464, "dur": 2.033, + "args": { + "External id": 991491,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9474, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941547866.773, "dur": 1822.476, + "args": { + "External id": 991492,"Sequence number": 10552499, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9475 + } + }, + { + "ph": "f", "id": 420, "pid": 2338706, "tid": 2379422, "ts": 6345941547866.773, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941547978.419, "dur": 158.053, + "args": { + "External id": 991493,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941548190.069, "dur": 41.730, + "args": { + "External id": 991494,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345941548250.754, "dur": 53.892, + "args": { + "External id": 991495,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941548314.509, "dur": 31.932, + "args": { + "External id": 991496,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941548352.619, "dur": 32.189, + "args": { + "External id": 991497,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941548393.024, "dur": 27.934, + "args": { + "External id": 991498,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941548427.908, "dur": 28.875, + "args": { + "External id": 991499,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941548483.493, "dur": 23.140, + "args": { + "External id": 991500,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941548524.880, "dur": 29.147, + "args": { + "External id": 991501,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941548576.452, "dur": 18.975, + "args": { + "External id": 991502,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941548611.524, "dur": 16.577, + "args": { + "External id": 991503,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941548637.460, "dur": 35.593, + "args": { + "External id": 991504,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941548677.074, "dur": 32.580, + "args": { + "External id": 991505,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345941548737.074, "dur": 254.786, + "args": { + "External id": 991506,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941548820.295, "dur": 8.576, + "args": { + "External id": 991507,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941548830.866, "dur": 3.215, + "args": { + "External id": 991508,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941548835.476, "dur": 2.545, + "args": { + "External id": 991509,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941548839.433, "dur": 2.495, + "args": { + "External id": 991510,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941548889.395, "dur": 4.903, + "args": { + "External id": 991511,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941548891.404, "dur": 2.717, + "args": { + "External id": 991512,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941548896.141, "dur": 32.706, + "args": { + "External id": 991513,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941548902.447, "dur": 1.718, + "args": { + "External id": 991514,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941548930.608, "dur": 2.218, + "args": { + "External id": 991515,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941548931.746, "dur": 0.970, + "args": { + "External id": 991516,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941548934.013, "dur": 14.341, + "args": { + "External id": 991517,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941548936.035, "dur": 0.544, + "args": { + "External id": 991518,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941549047.796, "dur": 77.847, + "args": { + "External id": 991519,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941549158.211, "dur": 17.631, + "args": { + "External id": 991520,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941549186.571, "dur": 49.343, + "args": { + "External id": 991521,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941549243.757, "dur": 39.310, + "args": { + "External id": 991522,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941549308.431, "dur": 29.993, + "args": { + "External id": 991523,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941549346.217, "dur": 38.045, + "args": { + "External id": 991524,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941549393.176, "dur": 29.879, + "args": { + "External id": 991525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941549430.856, "dur": 30.702, + "args": { + "External id": 991526,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345941549485.184, "dur": 26.564, + "args": { + "External id": 991527,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941549532.169, "dur": 27.134, + "args": { + "External id": 991528,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941549574.368, "dur": 19.374, + "args": { + "External id": 991529,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941549608.493, "dur": 15.832, + "args": { + "External id": 991530,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345941549637.319, "dur": 18.004, + "args": { + "External id": 991531,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941549737.840, "dur": 16.593, + "args": { + "External id": 991532,"Record function id": 0, "Ev Idx": 9515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941549741.727, "dur": 11.653, + "args": { + "External id": 991533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941549746.150, "dur": 6.174, + "args": { + "External id": 991534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941549748.028, "dur": 4.178, + "args": { + "External id": 991535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941549758.491, "dur": 5.533, + "args": { + "External id": 991536,"Record function id": 0, "Ev Idx": 9519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941549760.087, "dur": 3.415, + "args": { + "External id": 991537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941549760.816, "dur": 2.070, + "args": { + "External id": 991538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941549761.804, "dur": 0.973, + "args": { + "External id": 991539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941549767.306, "dur": 5.153, + "args": { + "External id": 991540,"Record function id": 0, "Ev Idx": 9523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941549768.553, "dur": 3.508, + "args": { + "External id": 991541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941549769.104, "dur": 2.500, + "args": { + "External id": 991542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941549770.131, "dur": 1.366, + "args": { + "External id": 991543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941549775.661, "dur": 5.830, + "args": { + "External id": 991544,"Record function id": 0, "Ev Idx": 9527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941549777.283, "dur": 3.790, + "args": { + "External id": 991545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941549778.324, "dur": 2.158, + "args": { + "External id": 991546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941549779.021, "dur": 1.308, + "args": { + "External id": 991547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941549784.484, "dur": 8.087, + "args": { + "External id": 991548,"Record function id": 0, "Ev Idx": 9531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941549785.890, "dur": 6.264, + "args": { + "External id": 991549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941549786.374, "dur": 5.163, + "args": { + "External id": 991550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941549787.871, "dur": 3.564, + "args": { + "External id": 991551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941549795.691, "dur": 5.655, + "args": { + "External id": 991552,"Record function id": 0, "Ev Idx": 9535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941549797.062, "dur": 3.802, + "args": { + "External id": 991553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941549797.519, "dur": 2.906, + "args": { + "External id": 991554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941549798.922, "dur": 1.429, + "args": { + "External id": 991555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941549804.465, "dur": 5.297, + "args": { + "External id": 991556,"Record function id": 0, "Ev Idx": 9539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941549805.863, "dur": 3.494, + "args": { + "External id": 991557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941549806.296, "dur": 2.619, + "args": { + "External id": 991558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941549807.426, "dur": 1.418, + "args": { + "External id": 991559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941549812.798, "dur": 4.978, + "args": { + "External id": 991560,"Record function id": 0, "Ev Idx": 9543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941549814.144, "dur": 3.209, + "args": { + "External id": 991561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941549814.622, "dur": 2.168, + "args": { + "External id": 991562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941549815.573, "dur": 1.132, + "args": { + "External id": 991563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941549821.505, "dur": 4.891, + "args": { + "External id": 991564,"Record function id": 0, "Ev Idx": 9547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941549822.784, "dur": 3.206, + "args": { + "External id": 991565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941549823.237, "dur": 2.213, + "args": { + "External id": 991566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941549824.053, "dur": 1.313, + "args": { + "External id": 991567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941549830.455, "dur": 62985.805, + "args": { + "External id": 991568,"Record function id": 0, "Sequence number": 10552498, "Fwd thread id": 1, "Ev Idx": 9551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941549831.782, "dur": 62975.024, + "args": { + "External id": 991569,"Sequence number": 10552498, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9552 + } + }, + { + "ph": "f", "id": 421, "pid": 2338706, "tid": 2379422, "ts": 6345941549831.782, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.13)", "pid": 2338706, "tid": 2379422, + "ts": 6345941549861.163, "dur": 38.194, + "args": { + "External id": 991570,"Record function id": 0, "Ev Idx": 9553 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.13)", "pid": 2338706, "tid": 2379422, + "ts": 6345941549908.000, "dur": 65.961, + "args": { + "External id": 991571,"Record function id": 0, "Ev Idx": 9554 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.13)", "pid": 2338706, "tid": 2379422, + "ts": 6345941549980.385, "dur": 62819.043, + "args": { + "External id": 991572,"Record function id": 0, "Ev Idx": 9555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941550130.112, "dur": 8.427, + "args": { + "External id": 991573,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941550150.120, "dur": 4.744, + "args": { + "External id": 991574,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941550170.838, "dur": 61685.329, + "args": { + "External id": 991575,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941550185.571, "dur": 61656.919, + "args": { + "External id": 991576,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941550292.305, "dur": 18.892, + "args": { + "External id": 991577,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941550330.489, "dur": 61467.533, + "args": { + "External id": 991578,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941550333.320, "dur": 61463.665, + "args": { + "External id": 991579,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941550337.526, "dur": 8.467, + "args": { + "External id": 991580,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941550352.229, "dur": 61439.827, + "args": { + "External id": 991581,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941611968.202, "dur": 12.289, + "args": { + "External id": 991582,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941611972.025, "dur": 8.117, + "args": { + "External id": 991583,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941612025.613, "dur": 397.373, + "args": { + "External id": 991584,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941612085.660, "dur": 331.974, + "args": { + "External id": 991585,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9568, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941612100.921, "dur": 311.415, + "args": { + "External id": 991586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941612442.735, "dur": 2.429, + "args": { + "External id": 991587,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9570, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941612508.602, "dur": 8.285, + "args": { + "External id": 991588,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941612529.769, "dur": 34.369, + "args": { + "External id": 991589,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941612574.609, "dur": 1.650, + "args": { + "External id": 991590,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941612582.734, "dur": 12.068, + "args": { + "External id": 991591,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941612600.976, "dur": 1.053, + "args": { + "External id": 991592,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941612607.138, "dur": 10.704, + "args": { + "External id": 991593,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941612622.885, "dur": 1.208, + "args": { + "External id": 991594,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941612628.081, "dur": 10.500, + "args": { + "External id": 991595,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941612643.021, "dur": 0.910, + "args": { + "External id": 991596,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941612648.524, "dur": 9.861, + "args": { + "External id": 991597,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941612662.316, "dur": 1.236, + "args": { + "External id": 991598,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941612667.707, "dur": 10.474, + "args": { + "External id": 991599,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941612682.388, "dur": 1.034, + "args": { + "External id": 991600,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941612688.447, "dur": 9.576, + "args": { + "External id": 991601,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941612704.532, "dur": 1.106, + "args": { + "External id": 991602,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941612709.769, "dur": 10.852, + "args": { + "External id": 991603,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941612725.045, "dur": 3.351, + "args": { + "External id": 991604,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941612732.196, "dur": 9.953, + "args": { + "External id": 991605,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941612830.698, "dur": 2961.789, + "args": { + "External id": 991606,"Record function id": 0, "Ev Idx": 9589 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.12)", "pid": 2338706, "tid": 2379422, + "ts": 6345941612851.530, "dur": 1055.527, + "args": { + "External id": 991607,"Record function id": 0, "Ev Idx": 9590 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338706, "tid": 2379422, + "ts": 6345941612865.526, "dur": 383.303, + "args": { + "External id": 991608,"Record function id": 0, "Ev Idx": 9591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941612948.874, "dur": 4.103, + "args": { + "External id": 991609,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941612956.010, "dur": 0.753, + "args": { + "External id": 991610,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941612959.174, "dur": 1.059, + "args": { + "External id": 991611,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941612961.839, "dur": 0.952, + "args": { + "External id": 991612,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941612965.007, "dur": 1.016, + "args": { + "External id": 991613,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941612967.485, "dur": 0.765, + "args": { + "External id": 991614,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941612969.797, "dur": 0.613, + "args": { + "External id": 991615,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941612972.201, "dur": 3.787, + "args": { + "External id": 991616,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941612979.914, "dur": 0.667, + "args": { + "External id": 991617,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941612981.963, "dur": 0.562, + "args": { + "External id": 991618,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941613001.320, "dur": 211.558, + "args": { + "External id": 991619,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941613038.609, "dur": 169.096, + "args": { + "External id": 991620,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941613090.596, "dur": 15.849, + "args": { + "External id": 991621,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941613110.917, "dur": 66.666, + "args": { + "External id": 991622,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941613113.714, "dur": 63.528, + "args": { + "External id": 991623,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613117.907, "dur": 6.370, + "args": { + "External id": 991624,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941613126.017, "dur": 50.626, + "args": { + "External id": 991625,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9608 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.11", "pid": 2338706, "tid": 2379422, + "ts": 6345941613336.245, "dur": 563.447, + "args": { + "External id": 991626,"Record function id": 0, "Ev Idx": 9609 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338706, "tid": 2379422, + "ts": 6345941613353.464, "dur": 534.368, + "args": { + "External id": 991627,"Record function id": 0, "Ev Idx": 9610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941613414.857, "dur": 5.264, + "args": { + "External id": 991628,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941613435.767, "dur": 29.225, + "args": { + "External id": 991629,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613441.112, "dur": 1.895, + "args": { + "External id": 991630,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613444.712, "dur": 0.521, + "args": { + "External id": 991631,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613446.921, "dur": 3.090, + "args": { + "External id": 991632,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613451.540, "dur": 0.309, + "args": { + "External id": 991633,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613452.780, "dur": 0.698, + "args": { + "External id": 991634,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613455.138, "dur": 0.351, + "args": { + "External id": 991635,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613456.938, "dur": 0.427, + "args": { + "External id": 991636,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613458.267, "dur": 0.551, + "args": { + "External id": 991637,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613460.725, "dur": 0.405, + "args": { + "External id": 991638,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941613476.894, "dur": 41.987, + "args": { + "External id": 991639,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345941613550.054, "dur": 115.500, + "args": { + "External id": 991640,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941613564.547, "dur": 2.996, + "args": { + "External id": 991641,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345941613573.039, "dur": 12.535, + "args": { + "External id": 991642,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345941613577.499, "dur": 7.647, + "args": { + "External id": 991643,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613581.376, "dur": 2.601, + "args": { + "External id": 991644,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941613592.286, "dur": 27.024, + "args": { + "External id": 991645,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613594.339, "dur": 0.752, + "args": { + "External id": 991646,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613596.503, "dur": 0.829, + "args": { + "External id": 991647,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613599.329, "dur": 0.552, + "args": { + "External id": 991648,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613601.497, "dur": 0.280, + "args": { + "External id": 991649,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613602.823, "dur": 0.461, + "args": { + "External id": 991650,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613604.782, "dur": 0.395, + "args": { + "External id": 991651,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613606.752, "dur": 0.378, + "args": { + "External id": 991652,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613608.059, "dur": 6.451, + "args": { + "External id": 991653,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941613616.415, "dur": 0.394, + "args": { + "External id": 991654,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941613627.631, "dur": 30.571, + "args": { + "External id": 991655,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941613708.196, "dur": 113.130, + "args": { + "External id": 991656,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941613731.686, "dur": 86.420, + "args": { + "External id": 991657,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9640, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941613740.599, "dur": 73.351, + "args": { + "External id": 991658,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941613838.516, "dur": 1.589, + "args": { + "External id": 991659,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9642, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941613914.335, "dur": 1851.994, + "args": { + "External id": 991660,"Sequence number": 10552497, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9643 + } + }, + { + "ph": "f", "id": 422, "pid": 2338706, "tid": 2379422, "ts": 6345941613914.335, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941614042.913, "dur": 145.675, + "args": { + "External id": 991661,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941614237.645, "dur": 43.838, + "args": { + "External id": 991662,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345941614303.088, "dur": 50.787, + "args": { + "External id": 991663,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941614363.957, "dur": 31.316, + "args": { + "External id": 991664,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941614401.753, "dur": 31.660, + "args": { + "External id": 991665,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941614439.601, "dur": 27.131, + "args": { + "External id": 991666,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941614475.369, "dur": 28.668, + "args": { + "External id": 991667,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941614528.753, "dur": 26.798, + "args": { + "External id": 991668,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941614576.542, "dur": 29.820, + "args": { + "External id": 991669,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941614628.017, "dur": 21.440, + "args": { + "External id": 991670,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941614664.641, "dur": 18.098, + "args": { + "External id": 991671,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941614694.391, "dur": 36.235, + "args": { + "External id": 991672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941614734.642, "dur": 33.321, + "args": { + "External id": 991673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345941614797.705, "dur": 302.100, + "args": { + "External id": 991674,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941614877.420, "dur": 6.544, + "args": { + "External id": 991675,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941614886.014, "dur": 2.915, + "args": { + "External id": 991676,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941614890.343, "dur": 4.980, + "args": { + "External id": 991677,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941614896.654, "dur": 2.272, + "args": { + "External id": 991678,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941614942.686, "dur": 5.011, + "args": { + "External id": 991679,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941614944.680, "dur": 2.815, + "args": { + "External id": 991680,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941614949.448, "dur": 30.918, + "args": { + "External id": 991681,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941614954.783, "dur": 1.609, + "args": { + "External id": 991682,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941614982.263, "dur": 1.796, + "args": { + "External id": 991683,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941614983.230, "dur": 0.729, + "args": { + "External id": 991684,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941614985.205, "dur": 15.238, + "args": { + "External id": 991685,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941614987.297, "dur": 0.490, + "args": { + "External id": 991686,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941615162.886, "dur": 36.075, + "args": { + "External id": 991687,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941615219.646, "dur": 20.118, + "args": { + "External id": 991688,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941615252.682, "dur": 52.190, + "args": { + "External id": 991689,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941615312.819, "dur": 41.094, + "args": { + "External id": 991690,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941615363.282, "dur": 23.002, + "args": { + "External id": 991691,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941615395.178, "dur": 46.576, + "args": { + "External id": 991692,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941615457.263, "dur": 35.487, + "args": { + "External id": 991693,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941615500.516, "dur": 34.763, + "args": { + "External id": 991694,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345941615554.899, "dur": 25.419, + "args": { + "External id": 991695,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941615599.378, "dur": 25.604, + "args": { + "External id": 991696,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941615639.698, "dur": 19.321, + "args": { + "External id": 991697,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941615683.498, "dur": 15.892, + "args": { + "External id": 991698,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345941615713.966, "dur": 18.080, + "args": { + "External id": 991699,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941615815.432, "dur": 16.187, + "args": { + "External id": 991700,"Record function id": 0, "Ev Idx": 9683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941615819.251, "dur": 11.396, + "args": { + "External id": 991701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941615823.586, "dur": 6.060, + "args": { + "External id": 991702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941615825.374, "dur": 4.146, + "args": { + "External id": 991703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941615835.446, "dur": 5.978, + "args": { + "External id": 991704,"Record function id": 0, "Ev Idx": 9687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941615837.459, "dur": 3.462, + "args": { + "External id": 991705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941615837.988, "dur": 2.322, + "args": { + "External id": 991706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941615839.008, "dur": 1.206, + "args": { + "External id": 991707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941615844.722, "dur": 5.812, + "args": { + "External id": 991708,"Record function id": 0, "Ev Idx": 9691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941615846.309, "dur": 3.769, + "args": { + "External id": 991709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941615846.924, "dur": 2.491, + "args": { + "External id": 991710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941615848.276, "dur": 1.067, + "args": { + "External id": 991711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941615853.719, "dur": 5.089, + "args": { + "External id": 991712,"Record function id": 0, "Ev Idx": 9695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941615855.032, "dur": 3.366, + "args": { + "External id": 991713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941615855.657, "dur": 2.124, + "args": { + "External id": 991714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941615856.572, "dur": 1.124, + "args": { + "External id": 991715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941615861.959, "dur": 5.189, + "args": { + "External id": 991716,"Record function id": 0, "Ev Idx": 9699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941615863.492, "dur": 3.230, + "args": { + "External id": 991717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941615864.234, "dur": 2.055, + "args": { + "External id": 991718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941615865.112, "dur": 1.094, + "args": { + "External id": 991719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941615870.243, "dur": 7.165, + "args": { + "External id": 991720,"Record function id": 0, "Ev Idx": 9703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941615871.577, "dur": 5.405, + "args": { + "External id": 991721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941615872.151, "dur": 4.273, + "args": { + "External id": 991722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941615873.074, "dur": 3.256, + "args": { + "External id": 991723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941615880.592, "dur": 5.550, + "args": { + "External id": 991724,"Record function id": 0, "Ev Idx": 9707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941615882.412, "dur": 3.234, + "args": { + "External id": 991725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941615883.111, "dur": 2.056, + "args": { + "External id": 991726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941615884.087, "dur": 1.005, + "args": { + "External id": 991727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941615889.904, "dur": 5.313, + "args": { + "External id": 991728,"Record function id": 0, "Ev Idx": 9711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941615891.654, "dur": 3.164, + "args": { + "External id": 991729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941615892.112, "dur": 2.273, + "args": { + "External id": 991730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941615892.960, "dur": 1.283, + "args": { + "External id": 991731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941615898.889, "dur": 5.777, + "args": { + "External id": 991732,"Record function id": 0, "Ev Idx": 9715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941615900.662, "dur": 3.593, + "args": { + "External id": 991733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941615901.159, "dur": 2.477, + "args": { + "External id": 991734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941615902.320, "dur": 1.240, + "args": { + "External id": 991735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941615908.959, "dur": 61295.058, + "args": { + "External id": 991736,"Record function id": 0, "Sequence number": 10552496, "Fwd thread id": 1, "Ev Idx": 9719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941615910.412, "dur": 61282.814, + "args": { + "External id": 991737,"Sequence number": 10552496, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9720 + } + }, + { + "ph": "f", "id": 423, "pid": 2338706, "tid": 2379422, "ts": 6345941615910.412, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.12)", "pid": 2338706, "tid": 2379422, + "ts": 6345941615941.666, "dur": 40.735, + "args": { + "External id": 991738,"Record function id": 0, "Ev Idx": 9721 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.12)", "pid": 2338706, "tid": 2379422, + "ts": 6345941615991.051, "dur": 121.911, + "args": { + "External id": 991739,"Record function id": 0, "Ev Idx": 9722 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.12)", "pid": 2338706, "tid": 2379422, + "ts": 6345941616121.123, "dur": 61063.867, + "args": { + "External id": 991740,"Record function id": 0, "Ev Idx": 9723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941616216.131, "dur": 7.588, + "args": { + "External id": 991741,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941616234.520, "dur": 5.039, + "args": { + "External id": 991742,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941616254.394, "dur": 60025.203, + "args": { + "External id": 991743,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941616269.227, "dur": 59997.256, + "args": { + "External id": 991744,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941616446.661, "dur": 20.121, + "args": { + "External id": 991745,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941616487.587, "dur": 59729.819, + "args": { + "External id": 991746,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941616492.872, "dur": 59723.486, + "args": { + "External id": 991747,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941616497.829, "dur": 9.424, + "args": { + "External id": 991748,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941616509.593, "dur": 59701.887, + "args": { + "External id": 991749,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941676394.210, "dur": 14.426, + "args": { + "External id": 991750,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941676399.322, "dur": 8.964, + "args": { + "External id": 991751,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941676439.664, "dur": 310.118, + "args": { + "External id": 991752,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941676470.409, "dur": 274.556, + "args": { + "External id": 991753,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9736, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941676482.347, "dur": 255.781, + "args": { + "External id": 991754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941676769.231, "dur": 2.257, + "args": { + "External id": 991755,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9738, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941676830.023, "dur": 6.229, + "args": { + "External id": 991756,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941676848.191, "dur": 30.946, + "args": { + "External id": 991757,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941676889.411, "dur": 3.585, + "args": { + "External id": 991758,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941676899.192, "dur": 12.506, + "args": { + "External id": 991759,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941676917.401, "dur": 0.864, + "args": { + "External id": 991760,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941676922.405, "dur": 10.614, + "args": { + "External id": 991761,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941676937.884, "dur": 0.744, + "args": { + "External id": 991762,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941676942.348, "dur": 11.070, + "args": { + "External id": 991763,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941676960.842, "dur": 0.780, + "args": { + "External id": 991764,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941676965.486, "dur": 9.705, + "args": { + "External id": 991765,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941676979.434, "dur": 0.890, + "args": { + "External id": 991766,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941676984.195, "dur": 10.604, + "args": { + "External id": 991767,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941676999.010, "dur": 1.119, + "args": { + "External id": 991768,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941677004.102, "dur": 30.756, + "args": { + "External id": 991769,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677044.666, "dur": 1.366, + "args": { + "External id": 991770,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941677051.374, "dur": 46.564, + "args": { + "External id": 991771,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677106.944, "dur": 1.542, + "args": { + "External id": 991772,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941677112.692, "dur": 10.373, + "args": { + "External id": 991773,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941677220.776, "dur": 2935.591, + "args": { + "External id": 991774,"Record function id": 0, "Ev Idx": 9757 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.11)", "pid": 2338706, "tid": 2379422, + "ts": 6345941677242.866, "dur": 1071.463, + "args": { + "External id": 991775,"Record function id": 0, "Ev Idx": 9758 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338706, "tid": 2379422, + "ts": 6345941677259.467, "dur": 325.040, + "args": { + "External id": 991776,"Record function id": 0, "Ev Idx": 9759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941677350.321, "dur": 6.732, + "args": { + "External id": 991777,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941677360.094, "dur": 0.793, + "args": { + "External id": 991778,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941677363.013, "dur": 0.844, + "args": { + "External id": 991779,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941677365.510, "dur": 0.945, + "args": { + "External id": 991780,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941677367.860, "dur": 0.757, + "args": { + "External id": 991781,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941677372.685, "dur": 0.822, + "args": { + "External id": 991782,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941677375.167, "dur": 0.860, + "args": { + "External id": 991783,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941677377.671, "dur": 2.094, + "args": { + "External id": 991784,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941677381.931, "dur": 2.705, + "args": { + "External id": 991785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941677387.912, "dur": 0.593, + "args": { + "External id": 991786,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941677411.021, "dur": 142.903, + "args": { + "External id": 991787,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941677427.541, "dur": 121.559, + "args": { + "External id": 991788,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941677442.344, "dur": 13.762, + "args": { + "External id": 991789,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941677460.088, "dur": 59.774, + "args": { + "External id": 991790,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941677463.070, "dur": 56.464, + "args": { + "External id": 991791,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677467.230, "dur": 5.349, + "args": { + "External id": 991792,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941677474.667, "dur": 44.366, + "args": { + "External id": 991793,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9776 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.10", "pid": 2338706, "tid": 2379422, + "ts": 6345941677671.858, "dur": 634.411, + "args": { + "External id": 991794,"Record function id": 0, "Ev Idx": 9777 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338706, "tid": 2379422, + "ts": 6345941677688.854, "dur": 602.558, + "args": { + "External id": 991795,"Record function id": 0, "Ev Idx": 9778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941677749.465, "dur": 4.666, + "args": { + "External id": 991796,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941677770.397, "dur": 28.501, + "args": { + "External id": 991797,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677775.603, "dur": 1.465, + "args": { + "External id": 991798,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677779.023, "dur": 0.605, + "args": { + "External id": 991799,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677781.386, "dur": 0.446, + "args": { + "External id": 991800,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677783.474, "dur": 2.775, + "args": { + "External id": 991801,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677787.840, "dur": 0.487, + "args": { + "External id": 991802,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677789.877, "dur": 0.396, + "args": { + "External id": 991803,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677791.848, "dur": 0.558, + "args": { + "External id": 991804,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677793.308, "dur": 0.320, + "args": { + "External id": 991805,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677795.095, "dur": 0.393, + "args": { + "External id": 991806,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941677810.475, "dur": 41.563, + "args": { + "External id": 991807,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345941677882.075, "dur": 108.412, + "args": { + "External id": 991808,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941677891.904, "dur": 3.319, + "args": { + "External id": 991809,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345941677900.502, "dur": 9.939, + "args": { + "External id": 991810,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345941677904.869, "dur": 5.135, + "args": { + "External id": 991811,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677908.563, "dur": 0.330, + "args": { + "External id": 991812,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941677916.872, "dur": 25.426, + "args": { + "External id": 991813,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677918.983, "dur": 3.030, + "args": { + "External id": 991814,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677923.240, "dur": 0.416, + "args": { + "External id": 991815,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677926.084, "dur": 0.462, + "args": { + "External id": 991816,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677928.168, "dur": 0.350, + "args": { + "External id": 991817,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677929.430, "dur": 0.398, + "args": { + "External id": 991818,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677931.387, "dur": 0.395, + "args": { + "External id": 991819,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677933.582, "dur": 0.301, + "args": { + "External id": 991820,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677934.878, "dur": 0.456, + "args": { + "External id": 991821,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941677937.287, "dur": 2.572, + "args": { + "External id": 991822,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941677951.915, "dur": 31.236, + "args": { + "External id": 991823,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941678093.091, "dur": 122.863, + "args": { + "External id": 991824,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941678119.764, "dur": 92.469, + "args": { + "External id": 991825,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9808, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941678130.434, "dur": 77.745, + "args": { + "External id": 991826,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941678233.364, "dur": 1.779, + "args": { + "External id": 991827,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9810, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941678321.580, "dur": 1806.659, + "args": { + "External id": 991828,"Sequence number": 10552495, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9811 + } + }, + { + "ph": "f", "id": 424, "pid": 2338706, "tid": 2379422, "ts": 6345941678321.580, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941678433.975, "dur": 105.069, + "args": { + "External id": 991829,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941678580.789, "dur": 44.306, + "args": { + "External id": 991830,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345941678644.458, "dur": 48.863, + "args": { + "External id": 991831,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941678703.739, "dur": 30.552, + "args": { + "External id": 991832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941678741.151, "dur": 33.465, + "args": { + "External id": 991833,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941678780.897, "dur": 28.396, + "args": { + "External id": 991834,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941678818.241, "dur": 28.874, + "args": { + "External id": 991835,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941678873.729, "dur": 22.802, + "args": { + "External id": 991836,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941678917.116, "dur": 27.864, + "args": { + "External id": 991837,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941678967.182, "dur": 19.576, + "args": { + "External id": 991838,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941678999.368, "dur": 34.984, + "args": { + "External id": 991839,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941679048.645, "dur": 80.487, + "args": { + "External id": 991840,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941679135.663, "dur": 35.282, + "args": { + "External id": 991841,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345941679202.667, "dur": 261.310, + "args": { + "External id": 991842,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941679282.548, "dur": 6.252, + "args": { + "External id": 991843,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941679291.051, "dur": 2.772, + "args": { + "External id": 991844,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941679295.174, "dur": 2.592, + "args": { + "External id": 991845,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941679299.099, "dur": 4.673, + "args": { + "External id": 991846,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941679351.623, "dur": 5.340, + "args": { + "External id": 991847,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941679353.724, "dur": 3.008, + "args": { + "External id": 991848,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941679358.904, "dur": 33.763, + "args": { + "External id": 991849,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941679364.942, "dur": 1.664, + "args": { + "External id": 991850,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941679394.510, "dur": 6.570, + "args": { + "External id": 991851,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941679400.126, "dur": 0.860, + "args": { + "External id": 991852,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941679402.228, "dur": 18.661, + "args": { + "External id": 991853,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941679404.325, "dur": 0.615, + "args": { + "External id": 991854,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941679502.387, "dur": 30.523, + "args": { + "External id": 991855,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941679551.331, "dur": 20.856, + "args": { + "External id": 991856,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941679580.749, "dur": 43.110, + "args": { + "External id": 991857,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941679631.705, "dur": 40.255, + "args": { + "External id": 991858,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941679681.444, "dur": 22.323, + "args": { + "External id": 991859,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941679712.362, "dur": 32.930, + "args": { + "External id": 991860,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941679752.715, "dur": 28.786, + "args": { + "External id": 991861,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941679789.574, "dur": 31.911, + "args": { + "External id": 991862,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345941679853.448, "dur": 33.433, + "args": { + "External id": 991863,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941679905.662, "dur": 27.080, + "args": { + "External id": 991864,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941679946.685, "dur": 19.147, + "args": { + "External id": 991865,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941679983.769, "dur": 15.262, + "args": { + "External id": 991866,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345941680031.739, "dur": 56.080, + "args": { + "External id": 991867,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941680180.505, "dur": 17.863, + "args": { + "External id": 991868,"Record function id": 0, "Ev Idx": 9851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941680184.739, "dur": 12.653, + "args": { + "External id": 991869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941680189.767, "dur": 6.615, + "args": { + "External id": 991870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941680191.499, "dur": 4.705, + "args": { + "External id": 991871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941680202.587, "dur": 6.849, + "args": { + "External id": 991872,"Record function id": 0, "Ev Idx": 9855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941680204.650, "dur": 4.281, + "args": { + "External id": 991873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941680205.567, "dur": 2.894, + "args": { + "External id": 991874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941680207.035, "dur": 1.328, + "args": { + "External id": 991875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941680212.809, "dur": 6.060, + "args": { + "External id": 991876,"Record function id": 0, "Ev Idx": 9859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941680214.719, "dur": 3.714, + "args": { + "External id": 991877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941680215.214, "dur": 2.600, + "args": { + "External id": 991878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941680216.278, "dur": 1.454, + "args": { + "External id": 991879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941680222.195, "dur": 5.207, + "args": { + "External id": 991880,"Record function id": 0, "Ev Idx": 9863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941680223.729, "dur": 3.230, + "args": { + "External id": 991881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941680224.209, "dur": 2.354, + "args": { + "External id": 991882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941680225.553, "dur": 0.935, + "args": { + "External id": 991883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941680230.570, "dur": 5.598, + "args": { + "External id": 991884,"Record function id": 0, "Ev Idx": 9867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941680232.142, "dur": 3.606, + "args": { + "External id": 991885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941680232.646, "dur": 2.247, + "args": { + "External id": 991886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941680233.544, "dur": 1.265, + "args": { + "External id": 991887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941680239.562, "dur": 7.660, + "args": { + "External id": 991888,"Record function id": 0, "Ev Idx": 9871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941680241.074, "dur": 5.688, + "args": { + "External id": 991889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941680241.516, "dur": 4.436, + "args": { + "External id": 991890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941680242.659, "dur": 3.195, + "args": { + "External id": 991891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941680250.429, "dur": 4.928, + "args": { + "External id": 991892,"Record function id": 0, "Ev Idx": 9875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941680251.742, "dur": 3.175, + "args": { + "External id": 991893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941680252.237, "dur": 2.237, + "args": { + "External id": 991894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941680253.212, "dur": 1.177, + "args": { + "External id": 991895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941680258.565, "dur": 5.298, + "args": { + "External id": 991896,"Record function id": 0, "Ev Idx": 9879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941680260.243, "dur": 3.195, + "args": { + "External id": 991897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941680260.900, "dur": 1.944, + "args": { + "External id": 991898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941680261.613, "dur": 1.139, + "args": { + "External id": 991899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941680267.396, "dur": 4.139, + "args": { + "External id": 991900,"Record function id": 0, "Ev Idx": 9883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941680268.494, "dur": 2.554, + "args": { + "External id": 991901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941680268.981, "dur": 1.401, + "args": { + "External id": 991902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941680269.319, "dur": 0.980, + "args": { + "External id": 991903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941680275.651, "dur": 61184.638, + "args": { + "External id": 991904,"Record function id": 0, "Sequence number": 10552494, "Fwd thread id": 1, "Ev Idx": 9887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941680276.956, "dur": 61174.514, + "args": { + "External id": 991905,"Sequence number": 10552494, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9888 + } + }, + { + "ph": "f", "id": 425, "pid": 2338706, "tid": 2379422, "ts": 6345941680276.956, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.11)", "pid": 2338706, "tid": 2379422, + "ts": 6345941680307.958, "dur": 40.286, + "args": { + "External id": 991906,"Record function id": 0, "Ev Idx": 9889 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.11)", "pid": 2338706, "tid": 2379422, + "ts": 6345941680357.076, "dur": 67.835, + "args": { + "External id": 991907,"Record function id": 0, "Ev Idx": 9890 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.11)", "pid": 2338706, "tid": 2379422, + "ts": 6345941680431.503, "dur": 61011.290, + "args": { + "External id": 991908,"Record function id": 0, "Ev Idx": 9891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941680522.361, "dur": 7.529, + "args": { + "External id": 991909,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941680539.389, "dur": 4.535, + "args": { + "External id": 991910,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941680559.101, "dur": 59867.208, + "args": { + "External id": 991911,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941680572.700, "dur": 59839.570, + "args": { + "External id": 991912,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941680742.757, "dur": 19.362, + "args": { + "External id": 991913,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941680781.469, "dur": 59579.265, + "args": { + "External id": 991914,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941680784.244, "dur": 59575.353, + "args": { + "External id": 991915,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941680789.107, "dur": 9.238, + "args": { + "External id": 991916,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941680800.825, "dur": 59553.247, + "args": { + "External id": 991917,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941740542.673, "dur": 12.337, + "args": { + "External id": 991918,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941740546.193, "dur": 8.500, + "args": { + "External id": 991919,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941740590.086, "dur": 408.808, + "args": { + "External id": 991920,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941740623.196, "dur": 370.555, + "args": { + "External id": 991921,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9904, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941740634.863, "dur": 353.342, + "args": { + "External id": 991922,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941741040.863, "dur": 3.399, + "args": { + "External id": 991923,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9906, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941741145.224, "dur": 7.344, + "args": { + "External id": 991924,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941741165.079, "dur": 37.880, + "args": { + "External id": 991925,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941741214.098, "dur": 3.812, + "args": { + "External id": 991926,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941741223.778, "dur": 13.181, + "args": { + "External id": 991927,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941741243.112, "dur": 0.947, + "args": { + "External id": 991928,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941741248.386, "dur": 10.787, + "args": { + "External id": 991929,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941741264.709, "dur": 1.054, + "args": { + "External id": 991930,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941741269.974, "dur": 10.836, + "args": { + "External id": 991931,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941741285.935, "dur": 0.901, + "args": { + "External id": 991932,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941741290.389, "dur": 9.923, + "args": { + "External id": 991933,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941741305.316, "dur": 1.063, + "args": { + "External id": 991934,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941741310.599, "dur": 10.313, + "args": { + "External id": 991935,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941741325.883, "dur": 0.929, + "args": { + "External id": 991936,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941741330.764, "dur": 10.125, + "args": { + "External id": 991937,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941741347.774, "dur": 0.861, + "args": { + "External id": 991938,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941741352.333, "dur": 10.548, + "args": { + "External id": 991939,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941741367.339, "dur": 1.035, + "args": { + "External id": 991940,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941741372.201, "dur": 10.822, + "args": { + "External id": 991941,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941741477.419, "dur": 3009.389, + "args": { + "External id": 991942,"Record function id": 0, "Ev Idx": 9925 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.10)", "pid": 2338706, "tid": 2379422, + "ts": 6345941741500.138, "dur": 1095.416, + "args": { + "External id": 991943,"Record function id": 0, "Ev Idx": 9926 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338706, "tid": 2379422, + "ts": 6345941741516.661, "dur": 324.991, + "args": { + "External id": 991944,"Record function id": 0, "Ev Idx": 9927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941741603.473, "dur": 6.413, + "args": { + "External id": 991945,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941741613.251, "dur": 0.945, + "args": { + "External id": 991946,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941741616.102, "dur": 0.916, + "args": { + "External id": 991947,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941741618.624, "dur": 0.796, + "args": { + "External id": 991948,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941741620.878, "dur": 0.819, + "args": { + "External id": 991949,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941741625.637, "dur": 1.038, + "args": { + "External id": 991950,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941741628.262, "dur": 0.654, + "args": { + "External id": 991951,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941741630.479, "dur": 2.275, + "args": { + "External id": 991952,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941741634.339, "dur": 2.439, + "args": { + "External id": 991953,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941741640.233, "dur": 0.809, + "args": { + "External id": 991954,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941741659.600, "dur": 151.131, + "args": { + "External id": 991955,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941741677.705, "dur": 128.547, + "args": { + "External id": 991956,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941741695.250, "dur": 14.559, + "args": { + "External id": 991957,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941741713.952, "dur": 62.962, + "args": { + "External id": 991958,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941741716.576, "dur": 59.937, + "args": { + "External id": 991959,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941741720.798, "dur": 5.716, + "args": { + "External id": 991960,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941741728.603, "dur": 47.248, + "args": { + "External id": 991961,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9944 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.9", "pid": 2338706, "tid": 2379422, + "ts": 6345941741926.919, "dur": 660.903, + "args": { + "External id": 991962,"Record function id": 0, "Ev Idx": 9945 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338706, "tid": 2379422, + "ts": 6345941741944.867, "dur": 629.629, + "args": { + "External id": 991963,"Record function id": 0, "Ev Idx": 9946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941742003.980, "dur": 26.302, + "args": { + "External id": 991964,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941742048.705, "dur": 66.422, + "args": { + "External id": 991965,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941742088.351, "dur": 2.132, + "args": { + "External id": 991966,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941742092.674, "dur": 0.811, + "args": { + "External id": 991967,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941742095.188, "dur": 0.529, + "args": { + "External id": 991968,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941742097.618, "dur": 2.747, + "args": { + "External id": 991969,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941742101.854, "dur": 0.541, + "args": { + "External id": 991970,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941742103.995, "dur": 0.449, + "args": { + "External id": 991971,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941742106.011, "dur": 0.440, + "args": { + "External id": 991972,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941742107.844, "dur": 0.485, + "args": { + "External id": 991973,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941742110.372, "dur": 0.351, + "args": { + "External id": 991974,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941742127.525, "dur": 49.901, + "args": { + "External id": 991975,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345941742212.425, "dur": 119.960, + "args": { + "External id": 991976,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941742224.434, "dur": 4.721, + "args": { + "External id": 991977,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345941742235.158, "dur": 11.000, + "args": { + "External id": 991978,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345941742239.845, "dur": 5.888, + "args": { + "External id": 991979,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941742243.976, "dur": 0.636, + "args": { + "External id": 991980,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941742256.018, "dur": 25.529, + "args": { + "External id": 991981,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941742258.318, "dur": 2.454, + "args": { + "External id": 991982,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941742262.638, "dur": 0.667, + "args": { + "External id": 991983,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941742264.909, "dur": 0.495, + "args": { + "External id": 991984,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941742266.552, "dur": 0.656, + "args": { + "External id": 991985,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941742268.638, "dur": 0.401, + "args": { + "External id": 991986,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941742270.712, "dur": 0.321, + "args": { + "External id": 991987,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941742272.041, "dur": 0.436, + "args": { + "External id": 991988,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941742274.057, "dur": 0.451, + "args": { + "External id": 991989,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941742276.460, "dur": 2.641, + "args": { + "External id": 991990,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941742292.138, "dur": 32.730, + "args": { + "External id": 991991,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941742382.525, "dur": 119.281, + "args": { + "External id": 991992,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941742410.714, "dur": 87.567, + "args": { + "External id": 991993,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9976, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941742420.788, "dur": 73.222, + "args": { + "External id": 991994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941742519.654, "dur": 1.774, + "args": { + "External id": 991995,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9978, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941742602.770, "dur": 1858.955, + "args": { + "External id": 991996,"Sequence number": 10552493, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9979 + } + }, + { + "ph": "f", "id": 426, "pid": 2338706, "tid": 2379422, "ts": 6345941742602.770, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941742713.582, "dur": 103.530, + "args": { + "External id": 991997,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941742858.490, "dur": 42.703, + "args": { + "External id": 991998,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345941742920.317, "dur": 50.041, + "args": { + "External id": 991999,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941742980.860, "dur": 53.330, + "args": { + "External id": 992000,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941743043.700, "dur": 77.136, + "args": { + "External id": 992001,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941743132.468, "dur": 30.487, + "args": { + "External id": 992002,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941743172.803, "dur": 30.431, + "args": { + "External id": 992003,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941743233.679, "dur": 26.145, + "args": { + "External id": 992004,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941743281.582, "dur": 31.800, + "args": { + "External id": 992005,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941743335.486, "dur": 21.720, + "args": { + "External id": 992006,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941743371.728, "dur": 16.514, + "args": { + "External id": 992007,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941743399.189, "dur": 37.559, + "args": { + "External id": 992008,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941743440.563, "dur": 34.464, + "args": { + "External id": 992009,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345941743504.073, "dur": 269.297, + "args": { + "External id": 992010,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941743584.697, "dur": 6.778, + "args": { + "External id": 992011,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941743593.559, "dur": 2.836, + "args": { + "External id": 992012,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941743597.713, "dur": 2.759, + "args": { + "External id": 992013,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941743601.779, "dur": 4.712, + "args": { + "External id": 992014,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941743652.615, "dur": 5.882, + "args": { + "External id": 992015,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941743654.997, "dur": 3.341, + "args": { + "External id": 992016,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941743678.607, "dur": 30.561, + "args": { + "External id": 992017,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941743684.153, "dur": 1.685, + "args": { + "External id": 992018,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941743710.700, "dur": 1.906, + "args": { + "External id": 992019,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941743711.789, "dur": 0.740, + "args": { + "External id": 992020,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941743713.372, "dur": 14.370, + "args": { + "External id": 992021,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941743715.513, "dur": 0.609, + "args": { + "External id": 992022,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941743811.673, "dur": 27.438, + "args": { + "External id": 992023,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941743856.977, "dur": 18.072, + "args": { + "External id": 992024,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941743883.823, "dur": 44.620, + "args": { + "External id": 992025,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941743936.369, "dur": 41.020, + "args": { + "External id": 992026,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941743986.030, "dur": 42.288, + "args": { + "External id": 992027,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941744039.669, "dur": 90.175, + "args": { + "External id": 992028,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941744153.651, "dur": 38.146, + "args": { + "External id": 992029,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941744200.928, "dur": 31.018, + "args": { + "External id": 992030,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345941744259.444, "dur": 26.552, + "args": { + "External id": 992031,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941744302.774, "dur": 25.647, + "args": { + "External id": 992032,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941744343.305, "dur": 20.974, + "args": { + "External id": 992033,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941744380.062, "dur": 13.534, + "args": { + "External id": 992034,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345941744406.451, "dur": 20.827, + "args": { + "External id": 992035,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941744510.071, "dur": 16.767, + "args": { + "External id": 992036,"Record function id": 0, "Ev Idx": 10019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941744514.033, "dur": 11.882, + "args": { + "External id": 992037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941744518.624, "dur": 6.367, + "args": { + "External id": 992038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941744520.590, "dur": 4.269, + "args": { + "External id": 992039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941744530.850, "dur": 6.162, + "args": { + "External id": 992040,"Record function id": 0, "Ev Idx": 10023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941744532.629, "dur": 3.887, + "args": { + "External id": 992041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941744533.499, "dur": 2.520, + "args": { + "External id": 992042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941744534.621, "dur": 1.241, + "args": { + "External id": 992043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941744540.325, "dur": 5.671, + "args": { + "External id": 992044,"Record function id": 0, "Ev Idx": 10027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941744541.877, "dur": 3.648, + "args": { + "External id": 992045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941744542.583, "dur": 2.396, + "args": { + "External id": 992046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941744543.566, "dur": 1.340, + "args": { + "External id": 992047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941744549.121, "dur": 5.710, + "args": { + "External id": 992048,"Record function id": 0, "Ev Idx": 10031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941744550.799, "dur": 3.598, + "args": { + "External id": 992049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941744551.240, "dur": 2.452, + "args": { + "External id": 992050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941744552.548, "dur": 1.058, + "args": { + "External id": 992051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941744557.981, "dur": 5.803, + "args": { + "External id": 992052,"Record function id": 0, "Ev Idx": 10035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941744559.545, "dur": 3.750, + "args": { + "External id": 992053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941744560.269, "dur": 2.425, + "args": { + "External id": 992054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941744561.245, "dur": 1.373, + "args": { + "External id": 992055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941744566.986, "dur": 8.100, + "args": { + "External id": 992056,"Record function id": 0, "Ev Idx": 10039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941744568.575, "dur": 6.100, + "args": { + "External id": 992057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941744569.256, "dur": 4.889, + "args": { + "External id": 992058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941744570.196, "dur": 3.855, + "args": { + "External id": 992059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941744578.436, "dur": 5.940, + "args": { + "External id": 992060,"Record function id": 0, "Ev Idx": 10043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941744580.146, "dur": 3.810, + "args": { + "External id": 992061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941744580.949, "dur": 2.576, + "args": { + "External id": 992062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941744581.938, "dur": 1.502, + "args": { + "External id": 992063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941744587.505, "dur": 6.218, + "args": { + "External id": 992064,"Record function id": 0, "Ev Idx": 10047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941744589.173, "dur": 4.138, + "args": { + "External id": 992065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941744589.872, "dur": 2.770, + "args": { + "External id": 992066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941744591.125, "dur": 1.442, + "args": { + "External id": 992067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941744596.764, "dur": 5.675, + "args": { + "External id": 992068,"Record function id": 0, "Ev Idx": 10051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941744598.945, "dur": 3.034, + "args": { + "External id": 992069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941744599.474, "dur": 2.043, + "args": { + "External id": 992070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941744600.371, "dur": 1.069, + "args": { + "External id": 992071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941744606.975, "dur": 67673.240, + "args": { + "External id": 992072,"Record function id": 0, "Sequence number": 10552492, "Fwd thread id": 1, "Ev Idx": 10055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941744608.387, "dur": 67662.416, + "args": { + "External id": 992073,"Sequence number": 10552492, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10056 + } + }, + { + "ph": "f", "id": 427, "pid": 2338706, "tid": 2379422, "ts": 6345941744608.387, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.10)", "pid": 2338706, "tid": 2379422, + "ts": 6345941744638.153, "dur": 42.021, + "args": { + "External id": 992074,"Record function id": 0, "Ev Idx": 10057 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.10)", "pid": 2338706, "tid": 2379422, + "ts": 6345941744688.852, "dur": 67.951, + "args": { + "External id": 992075,"Record function id": 0, "Ev Idx": 10058 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.10)", "pid": 2338706, "tid": 2379422, + "ts": 6345941744763.396, "dur": 67498.956, + "args": { + "External id": 992076,"Record function id": 0, "Ev Idx": 10059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941744854.741, "dur": 6.519, + "args": { + "External id": 992077,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941744871.371, "dur": 4.685, + "args": { + "External id": 992078,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941744891.392, "dur": 66371.348, + "args": { + "External id": 992079,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941744905.882, "dur": 66343.760, + "args": { + "External id": 992080,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941745037.792, "dur": 56.630, + "args": { + "External id": 992081,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941745117.968, "dur": 66088.847, + "args": { + "External id": 992082,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941745121.265, "dur": 66084.626, + "args": { + "External id": 992083,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941745126.532, "dur": 11.779, + "args": { + "External id": 992084,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941745142.706, "dur": 66058.401, + "args": { + "External id": 992085,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941811374.794, "dur": 12.326, + "args": { + "External id": 992086,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941811378.417, "dur": 8.348, + "args": { + "External id": 992087,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941811419.707, "dur": 372.577, + "args": { + "External id": 992088,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941811453.254, "dur": 334.286, + "args": { + "External id": 992089,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10072, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941811465.784, "dur": 316.262, + "args": { + "External id": 992090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941811836.024, "dur": 2.779, + "args": { + "External id": 992091,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10074, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941811898.809, "dur": 8.782, + "args": { + "External id": 992092,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941811921.291, "dur": 35.500, + "args": { + "External id": 992093,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941811967.333, "dur": 1.402, + "args": { + "External id": 992094,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941811975.051, "dur": 11.898, + "args": { + "External id": 992095,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941811992.700, "dur": 1.264, + "args": { + "External id": 992096,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941811998.595, "dur": 30.537, + "args": { + "External id": 992097,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812037.770, "dur": 1.763, + "args": { + "External id": 992098,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941812044.318, "dur": 44.196, + "args": { + "External id": 992099,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812098.048, "dur": 1.531, + "args": { + "External id": 992100,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941812104.245, "dur": 12.275, + "args": { + "External id": 992101,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812120.985, "dur": 1.153, + "args": { + "External id": 992102,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941812126.112, "dur": 10.989, + "args": { + "External id": 992103,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812141.759, "dur": 1.047, + "args": { + "External id": 992104,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941812146.453, "dur": 9.196, + "args": { + "External id": 992105,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812159.898, "dur": 1.024, + "args": { + "External id": 992106,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941812165.322, "dur": 10.570, + "args": { + "External id": 992107,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812182.420, "dur": 2.709, + "args": { + "External id": 992108,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941812188.631, "dur": 10.234, + "args": { + "External id": 992109,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941812298.757, "dur": 2943.216, + "args": { + "External id": 992110,"Record function id": 0, "Ev Idx": 10093 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.9)", "pid": 2338706, "tid": 2379422, + "ts": 6345941812321.684, "dur": 1053.589, + "args": { + "External id": 992111,"Record function id": 0, "Ev Idx": 10094 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338706, "tid": 2379422, + "ts": 6345941812338.198, "dur": 316.741, + "args": { + "External id": 992112,"Record function id": 0, "Ev Idx": 10095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941812424.178, "dur": 4.415, + "args": { + "External id": 992113,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941812431.679, "dur": 1.198, + "args": { + "External id": 992114,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941812434.986, "dur": 1.335, + "args": { + "External id": 992115,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941812438.173, "dur": 1.094, + "args": { + "External id": 992116,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941812441.037, "dur": 1.032, + "args": { + "External id": 992117,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941812443.383, "dur": 1.070, + "args": { + "External id": 992118,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941812445.826, "dur": 0.845, + "args": { + "External id": 992119,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941812448.809, "dur": 4.231, + "args": { + "External id": 992120,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941812456.858, "dur": 0.799, + "args": { + "External id": 992121,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941812459.139, "dur": 0.565, + "args": { + "External id": 992122,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941812477.629, "dur": 149.105, + "args": { + "External id": 992123,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941812495.356, "dur": 126.919, + "args": { + "External id": 992124,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941812511.279, "dur": 16.226, + "args": { + "External id": 992125,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941812531.422, "dur": 65.514, + "args": { + "External id": 992126,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941812534.472, "dur": 62.143, + "args": { + "External id": 992127,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812538.756, "dur": 6.093, + "args": { + "External id": 992128,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941812548.717, "dur": 47.089, + "args": { + "External id": 992129,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10112 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.8", "pid": 2338706, "tid": 2379422, + "ts": 6345941812740.300, "dur": 627.662, + "args": { + "External id": 992130,"Record function id": 0, "Ev Idx": 10113 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338706, "tid": 2379422, + "ts": 6345941812756.065, "dur": 598.830, + "args": { + "External id": 992131,"Record function id": 0, "Ev Idx": 10114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941812813.367, "dur": 6.261, + "args": { + "External id": 992132,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941812835.059, "dur": 30.028, + "args": { + "External id": 992133,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812840.639, "dur": 1.342, + "args": { + "External id": 992134,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812843.969, "dur": 0.619, + "args": { + "External id": 992135,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812846.480, "dur": 2.805, + "args": { + "External id": 992136,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812851.159, "dur": 0.282, + "args": { + "External id": 992137,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812852.502, "dur": 0.720, + "args": { + "External id": 992138,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812854.794, "dur": 0.440, + "args": { + "External id": 992139,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812856.988, "dur": 0.390, + "args": { + "External id": 992140,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812858.581, "dur": 0.521, + "args": { + "External id": 992141,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812860.941, "dur": 0.650, + "args": { + "External id": 992142,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941812875.038, "dur": 41.763, + "args": { + "External id": 992143,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345941812946.730, "dur": 171.554, + "args": { + "External id": 992144,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941812956.410, "dur": 2.781, + "args": { + "External id": 992145,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345941812964.336, "dur": 12.771, + "args": { + "External id": 992146,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345941812968.638, "dur": 8.062, + "args": { + "External id": 992147,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812972.278, "dur": 3.251, + "args": { + "External id": 992148,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941812983.985, "dur": 44.044, + "args": { + "External id": 992149,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812986.078, "dur": 0.609, + "args": { + "External id": 992150,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812988.146, "dur": 0.776, + "args": { + "External id": 992151,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812991.090, "dur": 0.741, + "args": { + "External id": 992152,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812993.897, "dur": 0.740, + "args": { + "External id": 992153,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812995.796, "dur": 0.673, + "args": { + "External id": 992154,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941812997.843, "dur": 0.551, + "args": { + "External id": 992155,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941813000.074, "dur": 0.551, + "args": { + "External id": 992156,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941813001.953, "dur": 3.098, + "args": { + "External id": 992157,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941813006.849, "dur": 17.557, + "args": { + "External id": 992158,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941813040.727, "dur": 67.677, + "args": { + "External id": 992159,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941813168.152, "dur": 114.449, + "args": { + "External id": 992160,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941813192.642, "dur": 86.377, + "args": { + "External id": 992161,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10144, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941813203.101, "dur": 71.880, + "args": { + "External id": 992162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941813299.732, "dur": 1.916, + "args": { + "External id": 992163,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10146, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941813382.902, "dur": 1836.112, + "args": { + "External id": 992164,"Sequence number": 10552491, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10147 + } + }, + { + "ph": "f", "id": 428, "pid": 2338706, "tid": 2379422, "ts": 6345941813382.902, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941813496.309, "dur": 102.957, + "args": { + "External id": 992165,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941813643.976, "dur": 41.810, + "args": { + "External id": 992166,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345941813705.263, "dur": 48.240, + "args": { + "External id": 992167,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941813763.452, "dur": 31.800, + "args": { + "External id": 992168,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941813801.384, "dur": 33.033, + "args": { + "External id": 992169,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941813840.869, "dur": 27.323, + "args": { + "External id": 992170,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941813877.157, "dur": 28.316, + "args": { + "External id": 992171,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941813931.505, "dur": 23.846, + "args": { + "External id": 992172,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941813976.117, "dur": 49.037, + "args": { + "External id": 992173,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941814088.237, "dur": 29.850, + "args": { + "External id": 992174,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941814135.877, "dur": 17.602, + "args": { + "External id": 992175,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941814169.430, "dur": 44.075, + "args": { + "External id": 992176,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941814218.043, "dur": 35.427, + "args": { + "External id": 992177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345941814302.953, "dur": 257.641, + "args": { + "External id": 992178,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941814387.247, "dur": 6.596, + "args": { + "External id": 992179,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941814396.345, "dur": 3.241, + "args": { + "External id": 992180,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941814401.505, "dur": 4.830, + "args": { + "External id": 992181,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941814407.874, "dur": 2.885, + "args": { + "External id": 992182,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941814459.214, "dur": 5.656, + "args": { + "External id": 992183,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941814461.763, "dur": 2.937, + "args": { + "External id": 992184,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941814466.799, "dur": 31.424, + "args": { + "External id": 992185,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941814472.234, "dur": 1.904, + "args": { + "External id": 992186,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941814499.995, "dur": 2.265, + "args": { + "External id": 992187,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941814501.470, "dur": 0.700, + "args": { + "External id": 992188,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941814503.541, "dur": 14.660, + "args": { + "External id": 992189,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941814505.570, "dur": 0.756, + "args": { + "External id": 992190,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941814597.395, "dur": 28.005, + "args": { + "External id": 992191,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941814643.324, "dur": 17.834, + "args": { + "External id": 992192,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941814670.339, "dur": 40.475, + "args": { + "External id": 992193,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941814718.899, "dur": 38.700, + "args": { + "External id": 992194,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941814766.351, "dur": 22.640, + "args": { + "External id": 992195,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941814798.050, "dur": 33.380, + "args": { + "External id": 992196,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941814839.414, "dur": 28.269, + "args": { + "External id": 992197,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941814889.365, "dur": 41.308, + "args": { + "External id": 992198,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345941814951.252, "dur": 24.554, + "args": { + "External id": 992199,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941814993.636, "dur": 49.738, + "args": { + "External id": 992200,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941815097.711, "dur": 21.479, + "args": { + "External id": 992201,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941815138.102, "dur": 14.865, + "args": { + "External id": 992202,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345941815166.146, "dur": 17.628, + "args": { + "External id": 992203,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941815264.871, "dur": 16.857, + "args": { + "External id": 992204,"Record function id": 0, "Ev Idx": 10187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941815269.017, "dur": 11.819, + "args": { + "External id": 992205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941815273.725, "dur": 6.033, + "args": { + "External id": 992206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941815275.555, "dur": 4.090, + "args": { + "External id": 992207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941815285.886, "dur": 6.692, + "args": { + "External id": 992208,"Record function id": 0, "Ev Idx": 10191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941815287.968, "dur": 4.104, + "args": { + "External id": 992209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941815288.892, "dur": 2.481, + "args": { + "External id": 992210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941815290.135, "dur": 1.133, + "args": { + "External id": 992211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941815295.934, "dur": 5.582, + "args": { + "External id": 992212,"Record function id": 0, "Ev Idx": 10195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941815297.636, "dur": 3.399, + "args": { + "External id": 992213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941815298.361, "dur": 2.125, + "args": { + "External id": 992214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941815299.050, "dur": 1.353, + "args": { + "External id": 992215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941815304.739, "dur": 5.183, + "args": { + "External id": 992216,"Record function id": 0, "Ev Idx": 10199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941815306.152, "dur": 3.344, + "args": { + "External id": 992217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941815306.885, "dur": 2.173, + "args": { + "External id": 992218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941815307.761, "dur": 1.214, + "args": { + "External id": 992219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941815313.035, "dur": 5.741, + "args": { + "External id": 992220,"Record function id": 0, "Ev Idx": 10203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941815314.767, "dur": 3.590, + "args": { + "External id": 992221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941815315.550, "dur": 2.185, + "args": { + "External id": 992222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941815316.727, "dur": 0.923, + "args": { + "External id": 992223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941815322.139, "dur": 6.529, + "args": { + "External id": 992224,"Record function id": 0, "Ev Idx": 10207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941815324.119, "dur": 4.152, + "args": { + "External id": 992225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941815324.855, "dur": 2.746, + "args": { + "External id": 992226,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941815326.056, "dur": 1.465, + "args": { + "External id": 992227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941815332.017, "dur": 8.487, + "args": { + "External id": 992228,"Record function id": 0, "Ev Idx": 10211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941815333.595, "dur": 6.360, + "args": { + "External id": 992229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941815334.399, "dur": 5.043, + "args": { + "External id": 992230,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941815335.668, "dur": 3.684, + "args": { + "External id": 992231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941815343.699, "dur": 6.462, + "args": { + "External id": 992232,"Record function id": 0, "Ev Idx": 10215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941815345.384, "dur": 4.373, + "args": { + "External id": 992233,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941815346.261, "dur": 3.040, + "args": { + "External id": 992234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941815347.523, "dur": 1.694, + "args": { + "External id": 992235,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941815353.215, "dur": 6.187, + "args": { + "External id": 992236,"Record function id": 0, "Ev Idx": 10219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941815354.866, "dur": 4.122, + "args": { + "External id": 992237,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941815355.895, "dur": 2.515, + "args": { + "External id": 992238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941815357.236, "dur": 1.090, + "args": { + "External id": 992239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941815363.346, "dur": 68532.811, + "args": { + "External id": 992240,"Record function id": 0, "Sequence number": 10552490, "Fwd thread id": 1, "Ev Idx": 10223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941815365.099, "dur": 68520.958, + "args": { + "External id": 992241,"Sequence number": 10552490, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10224 + } + }, + { + "ph": "f", "id": 429, "pid": 2338706, "tid": 2379422, "ts": 6345941815365.099, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.9)", "pid": 2338706, "tid": 2379422, + "ts": 6345941815395.132, "dur": 40.357, + "args": { + "External id": 992242,"Record function id": 0, "Ev Idx": 10225 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.9)", "pid": 2338706, "tid": 2379422, + "ts": 6345941815444.059, "dur": 67.243, + "args": { + "External id": 992243,"Record function id": 0, "Ev Idx": 10226 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.9)", "pid": 2338706, "tid": 2379422, + "ts": 6345941815517.689, "dur": 68360.962, + "args": { + "External id": 992244,"Record function id": 0, "Ev Idx": 10227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941815607.966, "dur": 7.082, + "args": { + "External id": 992245,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941815624.994, "dur": 4.323, + "args": { + "External id": 992246,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941815644.409, "dur": 67184.531, + "args": { + "External id": 992247,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941815659.019, "dur": 67156.194, + "args": { + "External id": 992248,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941815778.767, "dur": 17.916, + "args": { + "External id": 992249,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941815816.057, "dur": 66955.552, + "args": { + "External id": 992250,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941815821.790, "dur": 66948.744, + "args": { + "External id": 992251,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941815826.582, "dur": 8.605, + "args": { + "External id": 992252,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941815837.327, "dur": 66928.015, + "args": { + "External id": 992253,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941882941.978, "dur": 12.404, + "args": { + "External id": 992254,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941882945.768, "dur": 8.147, + "args": { + "External id": 992255,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941882984.968, "dur": 493.856, + "args": { + "External id": 992256,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941883104.008, "dur": 369.069, + "args": { + "External id": 992257,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10240, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941883119.307, "dur": 348.196, + "args": { + "External id": 992258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941883503.170, "dur": 2.273, + "args": { + "External id": 992259,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10242, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941883573.873, "dur": 7.368, + "args": { + "External id": 992260,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941883593.886, "dur": 35.418, + "args": { + "External id": 992261,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941883639.399, "dur": 5.019, + "args": { + "External id": 992262,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941883651.295, "dur": 12.840, + "args": { + "External id": 992263,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941883670.270, "dur": 0.825, + "args": { + "External id": 992264,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941883675.626, "dur": 9.797, + "args": { + "External id": 992265,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941883690.777, "dur": 0.907, + "args": { + "External id": 992266,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941883695.757, "dur": 13.283, + "args": { + "External id": 992267,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941883713.881, "dur": 0.979, + "args": { + "External id": 992268,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941883719.098, "dur": 11.461, + "args": { + "External id": 992269,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941883735.225, "dur": 1.136, + "args": { + "External id": 992270,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941883740.712, "dur": 12.905, + "args": { + "External id": 992271,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941883758.490, "dur": 1.145, + "args": { + "External id": 992272,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941883763.350, "dur": 11.407, + "args": { + "External id": 992273,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941883779.397, "dur": 1.003, + "args": { + "External id": 992274,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941883784.185, "dur": 11.937, + "args": { + "External id": 992275,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941883802.555, "dur": 1.077, + "args": { + "External id": 992276,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941883807.548, "dur": 10.976, + "args": { + "External id": 992277,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941883911.085, "dur": 2999.992, + "args": { + "External id": 992278,"Record function id": 0, "Ev Idx": 10261 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.8)", "pid": 2338706, "tid": 2379422, + "ts": 6345941883931.869, "dur": 1186.805, + "args": { + "External id": 992279,"Record function id": 0, "Ev Idx": 10262 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338706, "tid": 2379422, + "ts": 6345941883948.556, "dur": 383.625, + "args": { + "External id": 992280,"Record function id": 0, "Ev Idx": 10263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941884088.454, "dur": 7.752, + "args": { + "External id": 992281,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941884101.732, "dur": 0.876, + "args": { + "External id": 992282,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941884104.562, "dur": 0.796, + "args": { + "External id": 992283,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941884107.535, "dur": 0.725, + "args": { + "External id": 992284,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941884109.933, "dur": 1.073, + "args": { + "External id": 992285,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941884112.582, "dur": 0.660, + "args": { + "External id": 992286,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941884115.005, "dur": 0.869, + "args": { + "External id": 992287,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941884117.478, "dur": 1.973, + "args": { + "External id": 992288,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941884121.173, "dur": 2.667, + "args": { + "External id": 992289,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941884127.215, "dur": 0.850, + "args": { + "External id": 992290,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941884148.240, "dur": 152.102, + "args": { + "External id": 992291,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941884166.178, "dur": 129.290, + "args": { + "External id": 992292,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941884183.234, "dur": 15.477, + "args": { + "External id": 992293,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941884202.375, "dur": 65.960, + "args": { + "External id": 992294,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941884205.612, "dur": 62.357, + "args": { + "External id": 992295,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884210.351, "dur": 6.361, + "args": { + "External id": 992296,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941884218.489, "dur": 48.705, + "args": { + "External id": 992297,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10280 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.7", "pid": 2338706, "tid": 2379422, + "ts": 6345941884428.493, "dur": 681.597, + "args": { + "External id": 992298,"Record function id": 0, "Ev Idx": 10281 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338706, "tid": 2379422, + "ts": 6345941884448.271, "dur": 645.018, + "args": { + "External id": 992299,"Record function id": 0, "Ev Idx": 10282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941884513.273, "dur": 5.632, + "args": { + "External id": 992300,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941884534.384, "dur": 29.337, + "args": { + "External id": 992301,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884539.106, "dur": 1.619, + "args": { + "External id": 992302,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884543.224, "dur": 0.503, + "args": { + "External id": 992303,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884545.648, "dur": 0.345, + "args": { + "External id": 992304,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884547.094, "dur": 2.462, + "args": { + "External id": 992305,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884551.269, "dur": 0.582, + "args": { + "External id": 992306,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884553.478, "dur": 0.484, + "args": { + "External id": 992307,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884554.760, "dur": 0.351, + "args": { + "External id": 992308,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884557.255, "dur": 0.422, + "args": { + "External id": 992309,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884559.795, "dur": 0.426, + "args": { + "External id": 992310,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941884616.900, "dur": 41.240, + "args": { + "External id": 992311,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345941884691.205, "dur": 108.428, + "args": { + "External id": 992312,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941884701.067, "dur": 3.921, + "args": { + "External id": 992313,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345941884710.353, "dur": 10.531, + "args": { + "External id": 992314,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345941884714.753, "dur": 5.712, + "args": { + "External id": 992315,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884718.393, "dur": 0.825, + "args": { + "External id": 992316,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941884727.670, "dur": 26.470, + "args": { + "External id": 992317,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884730.162, "dur": 2.587, + "args": { + "External id": 992318,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884734.234, "dur": 0.548, + "args": { + "External id": 992319,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884736.090, "dur": 1.950, + "args": { + "External id": 992320,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884739.747, "dur": 0.482, + "args": { + "External id": 992321,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884741.978, "dur": 0.328, + "args": { + "External id": 992322,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884743.234, "dur": 0.475, + "args": { + "External id": 992323,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884745.386, "dur": 0.576, + "args": { + "External id": 992324,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884747.414, "dur": 0.435, + "args": { + "External id": 992325,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941884748.949, "dur": 2.613, + "args": { + "External id": 992326,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941884764.042, "dur": 28.617, + "args": { + "External id": 992327,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941884845.949, "dur": 118.829, + "args": { + "External id": 992328,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941884874.517, "dur": 86.650, + "args": { + "External id": 992329,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10312, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941884884.047, "dur": 72.245, + "args": { + "External id": 992330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941884982.748, "dur": 1.875, + "args": { + "External id": 992331,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10314, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941885126.826, "dur": 1761.155, + "args": { + "External id": 992332,"Sequence number": 10552489, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10315 + } + }, + { + "ph": "f", "id": 430, "pid": 2338706, "tid": 2379422, "ts": 6345941885126.826, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941885241.628, "dur": 108.117, + "args": { + "External id": 992333,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941885394.297, "dur": 41.097, + "args": { + "External id": 992334,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345941885454.601, "dur": 48.918, + "args": { + "External id": 992335,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941885513.922, "dur": 31.067, + "args": { + "External id": 992336,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941885551.609, "dur": 32.579, + "args": { + "External id": 992337,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941885590.843, "dur": 27.278, + "args": { + "External id": 992338,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941885626.672, "dur": 32.613, + "args": { + "External id": 992339,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941885686.843, "dur": 22.655, + "args": { + "External id": 992340,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941885730.692, "dur": 28.882, + "args": { + "External id": 992341,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941885782.658, "dur": 19.167, + "args": { + "External id": 992342,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941885816.583, "dur": 15.099, + "args": { + "External id": 992343,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941885842.438, "dur": 35.512, + "args": { + "External id": 992344,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941885882.174, "dur": 32.977, + "args": { + "External id": 992345,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345941885947.870, "dur": 330.382, + "args": { + "External id": 992346,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941886049.944, "dur": 47.689, + "args": { + "External id": 992347,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941886101.507, "dur": 3.941, + "args": { + "External id": 992348,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941886106.857, "dur": 2.680, + "args": { + "External id": 992349,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941886110.856, "dur": 8.230, + "args": { + "External id": 992350,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941886166.998, "dur": 4.899, + "args": { + "External id": 992351,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941886169.016, "dur": 2.698, + "args": { + "External id": 992352,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941886173.741, "dur": 33.394, + "args": { + "External id": 992353,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941886179.581, "dur": 1.780, + "args": { + "External id": 992354,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941886209.111, "dur": 2.013, + "args": { + "External id": 992355,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941886210.328, "dur": 0.703, + "args": { + "External id": 992356,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941886212.250, "dur": 17.401, + "args": { + "External id": 992357,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941886214.414, "dur": 0.819, + "args": { + "External id": 992358,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941886318.644, "dur": 29.017, + "args": { + "External id": 992359,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941886365.005, "dur": 19.085, + "args": { + "External id": 992360,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941886393.678, "dur": 62.871, + "args": { + "External id": 992361,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941886470.232, "dur": 44.751, + "args": { + "External id": 992362,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941886529.253, "dur": 23.065, + "args": { + "External id": 992363,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941886561.996, "dur": 35.237, + "args": { + "External id": 992364,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941886605.205, "dur": 29.547, + "args": { + "External id": 992365,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941886642.427, "dur": 30.730, + "args": { + "External id": 992366,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345941886693.429, "dur": 25.704, + "args": { + "External id": 992367,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941886735.262, "dur": 25.222, + "args": { + "External id": 992368,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941886775.426, "dur": 17.287, + "args": { + "External id": 992369,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941886807.783, "dur": 14.433, + "args": { + "External id": 992370,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345941886839.130, "dur": 16.512, + "args": { + "External id": 992371,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941886934.307, "dur": 16.247, + "args": { + "External id": 992372,"Record function id": 0, "Ev Idx": 10355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941886938.194, "dur": 11.425, + "args": { + "External id": 992373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941886942.832, "dur": 5.640, + "args": { + "External id": 992374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941886944.592, "dur": 3.769, + "args": { + "External id": 992375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941886954.524, "dur": 6.746, + "args": { + "External id": 992376,"Record function id": 0, "Ev Idx": 10359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941886956.367, "dur": 4.372, + "args": { + "External id": 992377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941886957.333, "dur": 2.734, + "args": { + "External id": 992378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941886958.555, "dur": 1.400, + "args": { + "External id": 992379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941886964.609, "dur": 5.313, + "args": { + "External id": 992380,"Record function id": 0, "Ev Idx": 10363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941886966.313, "dur": 3.061, + "args": { + "External id": 992381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941886966.830, "dur": 2.065, + "args": { + "External id": 992382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941886967.760, "dur": 1.036, + "args": { + "External id": 992383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941886973.261, "dur": 5.058, + "args": { + "External id": 992384,"Record function id": 0, "Ev Idx": 10367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941886974.595, "dur": 3.300, + "args": { + "External id": 992385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941886975.074, "dur": 2.367, + "args": { + "External id": 992386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941886976.274, "dur": 1.093, + "args": { + "External id": 992387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941886981.479, "dur": 4.853, + "args": { + "External id": 992388,"Record function id": 0, "Ev Idx": 10371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941886982.834, "dur": 3.075, + "args": { + "External id": 992389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941886983.340, "dur": 2.123, + "args": { + "External id": 992390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941886984.167, "dur": 1.187, + "args": { + "External id": 992391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941886989.507, "dur": 6.837, + "args": { + "External id": 992392,"Record function id": 0, "Ev Idx": 10375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941886990.557, "dur": 5.337, + "args": { + "External id": 992393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941886991.014, "dur": 4.136, + "args": { + "External id": 992394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941886991.773, "dur": 3.306, + "args": { + "External id": 992395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941886999.642, "dur": 4.220, + "args": { + "External id": 992396,"Record function id": 0, "Ev Idx": 10379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941887001.021, "dur": 2.403, + "args": { + "External id": 992397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941887001.488, "dur": 1.507, + "args": { + "External id": 992398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941887002.170, "dur": 0.744, + "args": { + "External id": 992399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941887006.962, "dur": 24.697, + "args": { + "External id": 992400,"Record function id": 0, "Ev Idx": 10383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941887026.414, "dur": 4.581, + "args": { + "External id": 992401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941887027.618, "dur": 2.378, + "args": { + "External id": 992402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941887028.366, "dur": 1.361, + "args": { + "External id": 992403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941887036.767, "dur": 4.117, + "args": { + "External id": 992404,"Record function id": 0, "Ev Idx": 10387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941887037.953, "dur": 2.492, + "args": { + "External id": 992405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941887038.682, "dur": 1.325, + "args": { + "External id": 992406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941887039.021, "dur": 0.913, + "args": { + "External id": 992407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941887045.514, "dur": 68200.421, + "args": { + "External id": 992408,"Record function id": 0, "Sequence number": 10552488, "Fwd thread id": 1, "Ev Idx": 10391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941887046.894, "dur": 68189.303, + "args": { + "External id": 992409,"Sequence number": 10552488, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10392 + } + }, + { + "ph": "f", "id": 431, "pid": 2338706, "tid": 2379422, "ts": 6345941887046.894, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.8)", "pid": 2338706, "tid": 2379422, + "ts": 6345941887114.946, "dur": 40.658, + "args": { + "External id": 992410,"Record function id": 0, "Ev Idx": 10393 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.8)", "pid": 2338706, "tid": 2379422, + "ts": 6345941887163.567, "dur": 66.321, + "args": { + "External id": 992411,"Record function id": 0, "Ev Idx": 10394 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.8)", "pid": 2338706, "tid": 2379422, + "ts": 6345941887235.279, "dur": 67992.343, + "args": { + "External id": 992412,"Record function id": 0, "Ev Idx": 10395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941887328.000, "dur": 7.858, + "args": { + "External id": 992413,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941887346.979, "dur": 4.889, + "args": { + "External id": 992414,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941887366.341, "dur": 66891.355, + "args": { + "External id": 992415,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941887380.675, "dur": 66862.946, + "args": { + "External id": 992416,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941887542.699, "dur": 18.610, + "args": { + "External id": 992417,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941887580.822, "dur": 66616.974, + "args": { + "External id": 992418,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941887583.842, "dur": 66612.909, + "args": { + "External id": 992419,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941887588.303, "dur": 9.795, + "args": { + "External id": 992420,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941887602.552, "dur": 66589.166, + "args": { + "External id": 992421,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941954373.952, "dur": 11.952, + "args": { + "External id": 992422,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941954377.388, "dur": 8.080, + "args": { + "External id": 992423,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941954415.821, "dur": 364.710, + "args": { + "External id": 992424,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941954447.153, "dur": 328.475, + "args": { + "External id": 992425,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10408, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941954461.089, "dur": 308.612, + "args": { + "External id": 992426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941954800.190, "dur": 2.136, + "args": { + "External id": 992427,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10410, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941954859.518, "dur": 6.680, + "args": { + "External id": 992428,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941954878.412, "dur": 34.489, + "args": { + "External id": 992429,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941954923.133, "dur": 1.806, + "args": { + "External id": 992430,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941954930.344, "dur": 13.462, + "args": { + "External id": 992431,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941954949.120, "dur": 3.170, + "args": { + "External id": 992432,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941954956.948, "dur": 12.406, + "args": { + "External id": 992433,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941954974.296, "dur": 0.725, + "args": { + "External id": 992434,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941954978.961, "dur": 11.909, + "args": { + "External id": 992435,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941954995.483, "dur": 0.696, + "args": { + "External id": 992436,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941955000.649, "dur": 31.111, + "args": { + "External id": 992437,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955039.600, "dur": 1.955, + "args": { + "External id": 992438,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941955047.208, "dur": 48.078, + "args": { + "External id": 992439,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955104.326, "dur": 1.741, + "args": { + "External id": 992440,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941955110.847, "dur": 12.637, + "args": { + "External id": 992441,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955128.391, "dur": 1.237, + "args": { + "External id": 992442,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941955133.781, "dur": 13.176, + "args": { + "External id": 992443,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955151.292, "dur": 1.202, + "args": { + "External id": 992444,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345941955156.307, "dur": 11.251, + "args": { + "External id": 992445,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941955263.497, "dur": 2904.207, + "args": { + "External id": 992446,"Record function id": 0, "Ev Idx": 10429 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.7)", "pid": 2338706, "tid": 2379422, + "ts": 6345941955284.475, "dur": 1054.704, + "args": { + "External id": 992447,"Record function id": 0, "Ev Idx": 10430 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338706, "tid": 2379422, + "ts": 6345941955299.806, "dur": 318.409, + "args": { + "External id": 992448,"Record function id": 0, "Ev Idx": 10431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941955384.922, "dur": 4.461, + "args": { + "External id": 992449,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941955392.486, "dur": 3.393, + "args": { + "External id": 992450,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941955398.306, "dur": 1.094, + "args": { + "External id": 992451,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941955401.270, "dur": 0.992, + "args": { + "External id": 992452,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941955404.209, "dur": 0.981, + "args": { + "External id": 992453,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941955408.688, "dur": 0.720, + "args": { + "External id": 992454,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941955411.214, "dur": 0.840, + "args": { + "External id": 992455,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941955413.597, "dur": 2.019, + "args": { + "External id": 992456,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941955416.938, "dur": 1.066, + "args": { + "External id": 992457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941955421.541, "dur": 2.662, + "args": { + "External id": 992458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941955442.562, "dur": 146.844, + "args": { + "External id": 992459,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941955460.380, "dur": 124.515, + "args": { + "External id": 992460,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941955478.247, "dur": 14.866, + "args": { + "External id": 992461,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941955497.016, "dur": 60.993, + "args": { + "External id": 992462,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941955500.184, "dur": 57.461, + "args": { + "External id": 992463,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955504.293, "dur": 5.270, + "args": { + "External id": 992464,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941955511.449, "dur": 45.417, + "args": { + "External id": 992465,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10448 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.6", "pid": 2338706, "tid": 2379422, + "ts": 6345941955701.222, "dur": 629.791, + "args": { + "External id": 992466,"Record function id": 0, "Ev Idx": 10449 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338706, "tid": 2379422, + "ts": 6345941955717.715, "dur": 599.654, + "args": { + "External id": 992467,"Record function id": 0, "Ev Idx": 10450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941955778.817, "dur": 4.471, + "args": { + "External id": 992468,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941955798.777, "dur": 28.240, + "args": { + "External id": 992469,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955803.877, "dur": 1.328, + "args": { + "External id": 992470,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955806.954, "dur": 0.750, + "args": { + "External id": 992471,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955809.774, "dur": 0.376, + "args": { + "External id": 992472,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955811.720, "dur": 0.370, + "args": { + "External id": 992473,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955813.269, "dur": 2.544, + "args": { + "External id": 992474,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955817.590, "dur": 0.456, + "args": { + "External id": 992475,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955819.637, "dur": 0.718, + "args": { + "External id": 992476,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955821.274, "dur": 0.499, + "args": { + "External id": 992477,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955823.377, "dur": 0.350, + "args": { + "External id": 992478,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941955837.537, "dur": 42.189, + "args": { + "External id": 992479,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345941955909.518, "dur": 126.690, + "args": { + "External id": 992480,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941955919.379, "dur": 3.007, + "args": { + "External id": 992481,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345941955927.330, "dur": 10.251, + "args": { + "External id": 992482,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345941955931.582, "dur": 5.423, + "args": { + "External id": 992483,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955935.069, "dur": 0.693, + "args": { + "External id": 992484,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345941955943.764, "dur": 22.507, + "args": { + "External id": 992485,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955945.787, "dur": 0.619, + "args": { + "External id": 992486,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955947.800, "dur": 2.750, + "args": { + "External id": 992487,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955952.455, "dur": 0.443, + "args": { + "External id": 992488,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955954.653, "dur": 0.550, + "args": { + "External id": 992489,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955956.074, "dur": 0.603, + "args": { + "External id": 992490,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955958.039, "dur": 0.425, + "args": { + "External id": 992491,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955959.909, "dur": 0.396, + "args": { + "External id": 992492,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955961.246, "dur": 0.399, + "args": { + "External id": 992493,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941955963.333, "dur": 0.440, + "args": { + "External id": 992494,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941955976.292, "dur": 49.020, + "args": { + "External id": 992495,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345941956120.175, "dur": 122.194, + "args": { + "External id": 992496,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941956148.446, "dur": 90.632, + "args": { + "External id": 992497,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10480, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345941956159.010, "dur": 75.718, + "args": { + "External id": 992498,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345941956260.420, "dur": 2.117, + "args": { + "External id": 992499,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10482, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941956346.456, "dur": 1792.558, + "args": { + "External id": 992500,"Sequence number": 10552487, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10483 + } + }, + { + "ph": "f", "id": 432, "pid": 2338706, "tid": 2379422, "ts": 6345941956346.456, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941956461.879, "dur": 103.723, + "args": { + "External id": 992501,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941956607.459, "dur": 39.988, + "args": { + "External id": 992502,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345941956667.142, "dur": 47.448, + "args": { + "External id": 992503,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941956724.804, "dur": 31.807, + "args": { + "External id": 992504,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941956762.897, "dur": 31.730, + "args": { + "External id": 992505,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941956801.406, "dur": 26.640, + "args": { + "External id": 992506,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941956837.048, "dur": 28.455, + "args": { + "External id": 992507,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941956890.695, "dur": 22.594, + "args": { + "External id": 992508,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345941956933.172, "dur": 28.657, + "args": { + "External id": 992509,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941956983.660, "dur": 18.567, + "args": { + "External id": 992510,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941957038.247, "dur": 53.391, + "args": { + "External id": 992511,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941957123.712, "dur": 47.415, + "args": { + "External id": 992512,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941957175.940, "dur": 34.092, + "args": { + "External id": 992513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345941957242.634, "dur": 252.290, + "args": { + "External id": 992514,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941957321.624, "dur": 6.661, + "args": { + "External id": 992515,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941957330.495, "dur": 2.799, + "args": { + "External id": 992516,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941957334.657, "dur": 2.411, + "args": { + "External id": 992517,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941957338.225, "dur": 4.902, + "args": { + "External id": 992518,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941957387.665, "dur": 5.122, + "args": { + "External id": 992519,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941957389.648, "dur": 2.881, + "args": { + "External id": 992520,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941957394.494, "dur": 37.801, + "args": { + "External id": 992521,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941957399.881, "dur": 1.754, + "args": { + "External id": 992522,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345941957433.906, "dur": 1.725, + "args": { + "External id": 992523,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941957434.713, "dur": 0.829, + "args": { + "External id": 992524,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345941957436.909, "dur": 13.929, + "args": { + "External id": 992525,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941957438.671, "dur": 0.634, + "args": { + "External id": 992526,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345941957533.158, "dur": 28.691, + "args": { + "External id": 992527,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941957579.016, "dur": 17.080, + "args": { + "External id": 992528,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941957605.001, "dur": 38.815, + "args": { + "External id": 992529,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941957651.667, "dur": 38.529, + "args": { + "External id": 992530,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941957699.464, "dur": 21.765, + "args": { + "External id": 992531,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941957730.354, "dur": 32.629, + "args": { + "External id": 992532,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941957771.244, "dur": 28.156, + "args": { + "External id": 992533,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345941957807.605, "dur": 47.251, + "args": { + "External id": 992534,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345941957880.070, "dur": 23.561, + "args": { + "External id": 992535,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941957921.107, "dur": 27.273, + "args": { + "External id": 992536,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345941957962.845, "dur": 16.679, + "args": { + "External id": 992537,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345941957998.067, "dur": 33.010, + "args": { + "External id": 992538,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345941958047.839, "dur": 52.236, + "args": { + "External id": 992539,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941958191.764, "dur": 17.106, + "args": { + "External id": 992540,"Record function id": 0, "Ev Idx": 10523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941958196.135, "dur": 11.875, + "args": { + "External id": 992541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941958200.891, "dur": 5.983, + "args": { + "External id": 992542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941958202.626, "dur": 4.143, + "args": { + "External id": 992543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941958212.772, "dur": 10.048, + "args": { + "External id": 992544,"Record function id": 0, "Ev Idx": 10527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941958218.169, "dur": 4.125, + "args": { + "External id": 992545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941958219.066, "dur": 2.681, + "args": { + "External id": 992546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941958220.278, "dur": 1.329, + "args": { + "External id": 992547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941958226.067, "dur": 5.651, + "args": { + "External id": 992548,"Record function id": 0, "Ev Idx": 10531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941958227.797, "dur": 3.482, + "args": { + "External id": 992549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941958228.698, "dur": 2.157, + "args": { + "External id": 992550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941958229.713, "dur": 1.061, + "args": { + "External id": 992551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941958234.819, "dur": 5.269, + "args": { + "External id": 992552,"Record function id": 0, "Ev Idx": 10535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941958236.364, "dur": 3.305, + "args": { + "External id": 992553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941958236.914, "dur": 2.331, + "args": { + "External id": 992554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941958237.787, "dur": 1.384, + "args": { + "External id": 992555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941958243.185, "dur": 5.503, + "args": { + "External id": 992556,"Record function id": 0, "Ev Idx": 10539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941958244.763, "dur": 3.515, + "args": { + "External id": 992557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941958245.277, "dur": 2.487, + "args": { + "External id": 992558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941958246.446, "dur": 1.234, + "args": { + "External id": 992559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941958251.704, "dur": 7.187, + "args": { + "External id": 992560,"Record function id": 0, "Ev Idx": 10543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941958253.118, "dur": 5.336, + "args": { + "External id": 992561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941958253.547, "dur": 4.434, + "args": { + "External id": 992562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941958254.441, "dur": 3.442, + "args": { + "External id": 992563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941958262.138, "dur": 5.647, + "args": { + "External id": 992564,"Record function id": 0, "Ev Idx": 10547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941958264.018, "dur": 3.342, + "args": { + "External id": 992565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941958264.672, "dur": 2.300, + "args": { + "External id": 992566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941958265.831, "dur": 1.059, + "args": { + "External id": 992567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941958270.908, "dur": 9.530, + "args": { + "External id": 992568,"Record function id": 0, "Ev Idx": 10551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941958276.598, "dur": 3.427, + "args": { + "External id": 992569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941958277.443, "dur": 2.191, + "args": { + "External id": 992570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941958278.367, "dur": 1.192, + "args": { + "External id": 992571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941958284.038, "dur": 5.281, + "args": { + "External id": 992572,"Record function id": 0, "Ev Idx": 10555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345941958285.859, "dur": 3.040, + "args": { + "External id": 992573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941958286.363, "dur": 2.135, + "args": { + "External id": 992574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345941958287.202, "dur": 1.214, + "args": { + "External id": 992575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941958293.673, "dur": 70489.630, + "args": { + "External id": 992576,"Record function id": 0, "Sequence number": 10552486, "Fwd thread id": 1, "Ev Idx": 10559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345941958295.074, "dur": 70479.503, + "args": { + "External id": 992577,"Sequence number": 10552486, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10560 + } + }, + { + "ph": "f", "id": 433, "pid": 2338706, "tid": 2379422, "ts": 6345941958295.074, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.7)", "pid": 2338706, "tid": 2379422, + "ts": 6345941958326.160, "dur": 38.606, + "args": { + "External id": 992578,"Record function id": 0, "Ev Idx": 10561 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.7)", "pid": 2338706, "tid": 2379422, + "ts": 6345941958372.495, "dur": 66.477, + "args": { + "External id": 992579,"Record function id": 0, "Ev Idx": 10562 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.7)", "pid": 2338706, "tid": 2379422, + "ts": 6345941958445.856, "dur": 70320.892, + "args": { + "External id": 992580,"Record function id": 0, "Ev Idx": 10563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941958537.320, "dur": 7.339, + "args": { + "External id": 992581,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345941958554.745, "dur": 4.929, + "args": { + "External id": 992582,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941958573.848, "dur": 69231.210, + "args": { + "External id": 992583,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345941958588.687, "dur": 69202.743, + "args": { + "External id": 992584,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345941958700.216, "dur": 18.956, + "args": { + "External id": 992585,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345941958740.939, "dur": 69007.036, + "args": { + "External id": 992586,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345941958743.968, "dur": 69003.001, + "args": { + "External id": 992587,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345941958748.526, "dur": 8.818, + "args": { + "External id": 992588,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345941958759.626, "dur": 68982.243, + "args": { + "External id": 992589,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942027914.302, "dur": 11.949, + "args": { + "External id": 992590,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942027917.768, "dur": 8.076, + "args": { + "External id": 992591,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345942027958.961, "dur": 416.685, + "args": { + "External id": 992592,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942027994.002, "dur": 375.913, + "args": { + "External id": 992593,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10576, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345942028006.037, "dur": 357.560, + "args": { + "External id": 992594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942028398.888, "dur": 2.341, + "args": { + "External id": 992595,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10578, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942028464.289, "dur": 6.925, + "args": { + "External id": 992596,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942028484.341, "dur": 37.281, + "args": { + "External id": 992597,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942028532.012, "dur": 3.533, + "args": { + "External id": 992598,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942028541.782, "dur": 13.990, + "args": { + "External id": 992599,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942028562.018, "dur": 0.928, + "args": { + "External id": 992600,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942028567.733, "dur": 11.981, + "args": { + "External id": 992601,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942028584.713, "dur": 0.986, + "args": { + "External id": 992602,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942028589.991, "dur": 11.092, + "args": { + "External id": 992603,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942028605.499, "dur": 0.828, + "args": { + "External id": 992604,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942028611.246, "dur": 9.794, + "args": { + "External id": 992605,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942028625.360, "dur": 0.922, + "args": { + "External id": 992606,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942028630.360, "dur": 12.441, + "args": { + "External id": 992607,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942028647.484, "dur": 0.869, + "args": { + "External id": 992608,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942028652.269, "dur": 11.157, + "args": { + "External id": 992609,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942028667.815, "dur": 0.738, + "args": { + "External id": 992610,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942028672.398, "dur": 12.037, + "args": { + "External id": 992611,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942028690.020, "dur": 0.886, + "args": { + "External id": 992612,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942028694.755, "dur": 11.747, + "args": { + "External id": 992613,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942028797.955, "dur": 2984.498, + "args": { + "External id": 992614,"Record function id": 0, "Ev Idx": 10597 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.6)", "pid": 2338706, "tid": 2379422, + "ts": 6345942028820.353, "dur": 1086.159, + "args": { + "External id": 992615,"Record function id": 0, "Ev Idx": 10598 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338706, "tid": 2379422, + "ts": 6345942028836.546, "dur": 376.932, + "args": { + "External id": 992616,"Record function id": 0, "Ev Idx": 10599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942028918.910, "dur": 5.902, + "args": { + "External id": 992617,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942028927.900, "dur": 1.070, + "args": { + "External id": 992618,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942028931.104, "dur": 1.019, + "args": { + "External id": 992619,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942028933.674, "dur": 0.746, + "args": { + "External id": 992620,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942028936.205, "dur": 0.938, + "args": { + "External id": 992621,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942028938.527, "dur": 1.081, + "args": { + "External id": 992622,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942028941.069, "dur": 0.818, + "args": { + "External id": 992623,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942028945.807, "dur": 1.669, + "args": { + "External id": 992624,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942028948.938, "dur": 2.837, + "args": { + "External id": 992625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942028953.321, "dur": 0.619, + "args": { + "External id": 992626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942028971.895, "dur": 206.278, + "args": { + "External id": 992627,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942028989.167, "dur": 183.199, + "args": { + "External id": 992628,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942029004.258, "dur": 34.590, + "args": { + "External id": 992629,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345942029043.520, "dur": 101.577, + "args": { + "External id": 992630,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942029047.768, "dur": 96.893, + "args": { + "External id": 992631,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029051.855, "dur": 39.240, + "args": { + "External id": 992632,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942029094.147, "dur": 49.901, + "args": { + "External id": 992633,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10616 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.5", "pid": 2338706, "tid": 2379422, + "ts": 6345942029305.652, "dur": 592.733, + "args": { + "External id": 992634,"Record function id": 0, "Ev Idx": 10617 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338706, "tid": 2379422, + "ts": 6345942029323.686, "dur": 562.124, + "args": { + "External id": 992635,"Record function id": 0, "Ev Idx": 10618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942029387.003, "dur": 6.667, + "args": { + "External id": 992636,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345942029409.615, "dur": 28.640, + "args": { + "External id": 992637,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029414.691, "dur": 1.901, + "args": { + "External id": 992638,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029418.911, "dur": 0.734, + "args": { + "External id": 992639,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029421.204, "dur": 0.444, + "args": { + "External id": 992640,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029422.567, "dur": 2.450, + "args": { + "External id": 992641,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029426.445, "dur": 0.406, + "args": { + "External id": 992642,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029428.383, "dur": 0.365, + "args": { + "External id": 992643,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029429.831, "dur": 0.608, + "args": { + "External id": 992644,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029432.215, "dur": 0.487, + "args": { + "External id": 992645,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029434.352, "dur": 0.283, + "args": { + "External id": 992646,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942029449.538, "dur": 40.057, + "args": { + "External id": 992647,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345942029520.081, "dur": 106.837, + "args": { + "External id": 992648,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942029530.473, "dur": 3.070, + "args": { + "External id": 992649,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345942029538.620, "dur": 10.569, + "args": { + "External id": 992650,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345942029542.670, "dur": 6.105, + "args": { + "External id": 992651,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029546.658, "dur": 0.921, + "args": { + "External id": 992652,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345942029555.404, "dur": 25.147, + "args": { + "External id": 992653,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029557.431, "dur": 3.072, + "args": { + "External id": 992654,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029561.944, "dur": 0.707, + "args": { + "External id": 992655,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029563.952, "dur": 0.826, + "args": { + "External id": 992656,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029566.156, "dur": 0.353, + "args": { + "External id": 992657,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029568.353, "dur": 0.567, + "args": { + "External id": 992658,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029569.781, "dur": 0.593, + "args": { + "External id": 992659,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029572.128, "dur": 0.621, + "args": { + "External id": 992660,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029574.324, "dur": 0.406, + "args": { + "External id": 992661,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942029575.637, "dur": 2.339, + "args": { + "External id": 992662,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942029590.684, "dur": 28.868, + "args": { + "External id": 992663,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345942029670.371, "dur": 143.598, + "args": { + "External id": 992664,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942029724.423, "dur": 86.237, + "args": { + "External id": 992665,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10648, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345942029734.528, "dur": 72.155, + "args": { + "External id": 992666,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942029831.597, "dur": 2.386, + "args": { + "External id": 992667,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10650, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942029913.700, "dur": 1845.927, + "args": { + "External id": 992668,"Sequence number": 10552485, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10651 + } + }, + { + "ph": "f", "id": 434, "pid": 2338706, "tid": 2379422, "ts": 6345942029913.700, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942030048.443, "dur": 141.758, + "args": { + "External id": 992669,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345942030242.009, "dur": 44.798, + "args": { + "External id": 992670,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345942030308.627, "dur": 48.691, + "args": { + "External id": 992671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942030367.571, "dur": 32.282, + "args": { + "External id": 992672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942030406.343, "dur": 33.249, + "args": { + "External id": 992673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942030445.754, "dur": 27.311, + "args": { + "External id": 992674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942030481.985, "dur": 29.046, + "args": { + "External id": 992675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345942030536.621, "dur": 24.654, + "args": { + "External id": 992676,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345942030597.367, "dur": 33.405, + "args": { + "External id": 992677,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942030653.816, "dur": 21.338, + "args": { + "External id": 992678,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942030690.510, "dur": 17.426, + "args": { + "External id": 992679,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942030718.340, "dur": 38.452, + "args": { + "External id": 992680,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942030760.983, "dur": 32.582, + "args": { + "External id": 992681,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345942030822.344, "dur": 310.781, + "args": { + "External id": 992682,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942030901.469, "dur": 6.389, + "args": { + "External id": 992683,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942030909.926, "dur": 3.070, + "args": { + "External id": 992684,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942030914.887, "dur": 2.285, + "args": { + "External id": 992685,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942030918.219, "dur": 4.729, + "args": { + "External id": 992686,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345942030971.088, "dur": 4.564, + "args": { + "External id": 992687,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942030972.790, "dur": 2.650, + "args": { + "External id": 992688,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345942030977.380, "dur": 48.171, + "args": { + "External id": 992689,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942030982.365, "dur": 1.749, + "args": { + "External id": 992690,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345942031028.420, "dur": 1.996, + "args": { + "External id": 992691,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942031029.219, "dur": 1.068, + "args": { + "External id": 992692,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345942031031.672, "dur": 16.750, + "args": { + "External id": 992693,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942031033.674, "dur": 0.671, + "args": { + "External id": 992694,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345942031175.083, "dur": 30.280, + "args": { + "External id": 992695,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942031226.623, "dur": 20.400, + "args": { + "External id": 992696,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942031256.177, "dur": 55.510, + "args": { + "External id": 992697,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942031319.485, "dur": 56.679, + "args": { + "External id": 992698,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942031393.081, "dur": 26.869, + "args": { + "External id": 992699,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942031430.255, "dur": 36.006, + "args": { + "External id": 992700,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942031474.760, "dur": 28.295, + "args": { + "External id": 992701,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942031513.801, "dur": 32.840, + "args": { + "External id": 992702,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345942031566.903, "dur": 24.411, + "args": { + "External id": 992703,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942031608.651, "dur": 24.393, + "args": { + "External id": 992704,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942031648.762, "dur": 18.250, + "args": { + "External id": 992705,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942031681.460, "dur": 12.959, + "args": { + "External id": 992706,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345942031707.789, "dur": 16.732, + "args": { + "External id": 992707,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942031805.526, "dur": 16.806, + "args": { + "External id": 992708,"Record function id": 0, "Ev Idx": 10691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942031809.348, "dur": 11.995, + "args": { + "External id": 992709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942031814.256, "dur": 6.135, + "args": { + "External id": 992710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942031816.237, "dur": 4.031, + "args": { + "External id": 992711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942031826.394, "dur": 6.227, + "args": { + "External id": 992712,"Record function id": 0, "Ev Idx": 10695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942031828.135, "dur": 3.961, + "args": { + "External id": 992713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942031829.035, "dur": 2.543, + "args": { + "External id": 992714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942031830.057, "dur": 1.417, + "args": { + "External id": 992715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942031835.984, "dur": 5.645, + "args": { + "External id": 992716,"Record function id": 0, "Ev Idx": 10699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942031837.952, "dur": 3.222, + "args": { + "External id": 992717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942031838.576, "dur": 2.146, + "args": { + "External id": 992718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942031839.556, "dur": 1.061, + "args": { + "External id": 992719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942031844.941, "dur": 5.485, + "args": { + "External id": 992720,"Record function id": 0, "Ev Idx": 10703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942031846.236, "dur": 3.758, + "args": { + "External id": 992721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942031846.722, "dur": 2.692, + "args": { + "External id": 992722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942031848.214, "dur": 1.115, + "args": { + "External id": 992723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942031853.517, "dur": 5.769, + "args": { + "External id": 992724,"Record function id": 0, "Ev Idx": 10707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942031855.283, "dur": 3.573, + "args": { + "External id": 992725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942031855.705, "dur": 2.714, + "args": { + "External id": 992726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942031856.951, "dur": 1.384, + "args": { + "External id": 992727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942031862.429, "dur": 6.434, + "args": { + "External id": 992728,"Record function id": 0, "Ev Idx": 10711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942031864.625, "dur": 3.824, + "args": { + "External id": 992729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942031865.123, "dur": 2.442, + "args": { + "External id": 992730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942031866.212, "dur": 1.271, + "args": { + "External id": 992731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942031872.130, "dur": 7.885, + "args": { + "External id": 992732,"Record function id": 0, "Ev Idx": 10715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942031873.496, "dur": 6.092, + "args": { + "External id": 992733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942031873.969, "dur": 5.007, + "args": { + "External id": 992734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942031875.105, "dur": 3.766, + "args": { + "External id": 992735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942031883.145, "dur": 5.121, + "args": { + "External id": 992736,"Record function id": 0, "Ev Idx": 10719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942031884.721, "dur": 3.111, + "args": { + "External id": 992737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942031885.189, "dur": 2.079, + "args": { + "External id": 992738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942031886.044, "dur": 1.150, + "args": { + "External id": 992739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942031891.749, "dur": 4.610, + "args": { + "External id": 992740,"Record function id": 0, "Ev Idx": 10723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942031892.976, "dur": 2.971, + "args": { + "External id": 992741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942031893.599, "dur": 1.898, + "args": { + "External id": 992742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942031894.388, "dur": 1.026, + "args": { + "External id": 992743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942031900.854, "dur": 73972.789, + "args": { + "External id": 992744,"Record function id": 0, "Sequence number": 10552484, "Fwd thread id": 1, "Ev Idx": 10727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942031902.310, "dur": 73961.889, + "args": { + "External id": 992745,"Sequence number": 10552484, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10728 + } + }, + { + "ph": "f", "id": 435, "pid": 2338706, "tid": 2379422, "ts": 6345942031902.310, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.6)", "pid": 2338706, "tid": 2379422, + "ts": 6345942031932.564, "dur": 39.059, + "args": { + "External id": 992746,"Record function id": 0, "Ev Idx": 10729 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.6)", "pid": 2338706, "tid": 2379422, + "ts": 6345942031979.735, "dur": 122.486, + "args": { + "External id": 992747,"Record function id": 0, "Ev Idx": 10730 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.6)", "pid": 2338706, "tid": 2379422, + "ts": 6345942032111.070, "dur": 73744.558, + "args": { + "External id": 992748,"Record function id": 0, "Ev Idx": 10731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942032207.290, "dur": 7.166, + "args": { + "External id": 992749,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942032225.688, "dur": 5.117, + "args": { + "External id": 992750,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345942032245.077, "dur": 72551.577, + "args": { + "External id": 992751,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345942032259.818, "dur": 72523.554, + "args": { + "External id": 992752,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942032371.974, "dur": 18.211, + "args": { + "External id": 992753,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345942032411.385, "dur": 72327.173, + "args": { + "External id": 992754,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942032414.162, "dur": 72323.268, + "args": { + "External id": 992755,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942032418.974, "dur": 8.720, + "args": { + "External id": 992756,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942032429.938, "dur": 72302.303, + "args": { + "External id": 992757,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942104910.309, "dur": 12.194, + "args": { + "External id": 992758,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942104914.003, "dur": 8.152, + "args": { + "External id": 992759,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345942104956.364, "dur": 459.422, + "args": { + "External id": 992760,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942104987.955, "dur": 421.851, + "args": { + "External id": 992761,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10744, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345942104999.239, "dur": 403.615, + "args": { + "External id": 992762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942105437.574, "dur": 2.594, + "args": { + "External id": 992763,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10746, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942105509.547, "dur": 7.014, + "args": { + "External id": 992764,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942105533.361, "dur": 37.116, + "args": { + "External id": 992765,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942105580.888, "dur": 3.999, + "args": { + "External id": 992766,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942105591.148, "dur": 14.786, + "args": { + "External id": 992767,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942105611.803, "dur": 0.966, + "args": { + "External id": 992768,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942105660.787, "dur": 12.799, + "args": { + "External id": 992769,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942105681.683, "dur": 1.162, + "args": { + "External id": 992770,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942105686.939, "dur": 12.049, + "args": { + "External id": 992771,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942105703.065, "dur": 0.748, + "args": { + "External id": 992772,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942105707.375, "dur": 9.983, + "args": { + "External id": 992773,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942105721.385, "dur": 1.164, + "args": { + "External id": 992774,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942105726.547, "dur": 9.819, + "args": { + "External id": 992775,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942105740.375, "dur": 1.021, + "args": { + "External id": 992776,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942105745.328, "dur": 10.095, + "args": { + "External id": 992777,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942105761.410, "dur": 0.929, + "args": { + "External id": 992778,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942105766.165, "dur": 9.838, + "args": { + "External id": 992779,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942105779.768, "dur": 0.958, + "args": { + "External id": 992780,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942105784.502, "dur": 10.188, + "args": { + "External id": 992781,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942105888.472, "dur": 2969.309, + "args": { + "External id": 992782,"Record function id": 0, "Ev Idx": 10765 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.5)", "pid": 2338706, "tid": 2379422, + "ts": 6345942105909.077, "dur": 1070.702, + "args": { + "External id": 992783,"Record function id": 0, "Ev Idx": 10766 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338706, "tid": 2379422, + "ts": 6345942105924.701, "dur": 388.025, + "args": { + "External id": 992784,"Record function id": 0, "Ev Idx": 10767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942106028.889, "dur": 7.340, + "args": { + "External id": 992785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942106040.245, "dur": 0.952, + "args": { + "External id": 992786,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942106043.346, "dur": 0.996, + "args": { + "External id": 992787,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942106045.895, "dur": 0.805, + "args": { + "External id": 992788,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942106048.346, "dur": 0.816, + "args": { + "External id": 992789,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942106050.488, "dur": 0.687, + "args": { + "External id": 992790,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942106087.333, "dur": 1.655, + "args": { + "External id": 992791,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942106092.726, "dur": 1.770, + "args": { + "External id": 992792,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942106095.884, "dur": 2.565, + "args": { + "External id": 992793,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942106099.859, "dur": 0.696, + "args": { + "External id": 992794,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942106122.044, "dur": 155.849, + "args": { + "External id": 992795,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942106139.178, "dur": 133.827, + "args": { + "External id": 992796,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942106156.274, "dur": 15.880, + "args": { + "External id": 992797,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345942106175.917, "dur": 69.333, + "args": { + "External id": 992798,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942106180.944, "dur": 63.958, + "args": { + "External id": 992799,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106184.704, "dur": 6.699, + "args": { + "External id": 992800,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942106193.102, "dur": 51.114, + "args": { + "External id": 992801,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10784 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.4", "pid": 2338706, "tid": 2379422, + "ts": 6345942106405.240, "dur": 567.202, + "args": { + "External id": 992802,"Record function id": 0, "Ev Idx": 10785 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338706, "tid": 2379422, + "ts": 6345942106423.672, "dur": 536.282, + "args": { + "External id": 992803,"Record function id": 0, "Ev Idx": 10786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942106486.388, "dur": 5.045, + "args": { + "External id": 992804,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345942106508.135, "dur": 28.044, + "args": { + "External id": 992805,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106513.048, "dur": 1.810, + "args": { + "External id": 992806,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106516.452, "dur": 0.697, + "args": { + "External id": 992807,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106519.069, "dur": 0.435, + "args": { + "External id": 992808,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106521.037, "dur": 2.563, + "args": { + "External id": 992809,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106524.717, "dur": 0.466, + "args": { + "External id": 992810,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106526.785, "dur": 0.596, + "args": { + "External id": 992811,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106529.004, "dur": 0.471, + "args": { + "External id": 992812,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106530.503, "dur": 0.403, + "args": { + "External id": 992813,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106532.537, "dur": 0.384, + "args": { + "External id": 992814,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942106547.291, "dur": 43.531, + "args": { + "External id": 992815,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345942106620.779, "dur": 107.890, + "args": { + "External id": 992816,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942106630.929, "dur": 3.270, + "args": { + "External id": 992817,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345942106639.930, "dur": 10.032, + "args": { + "External id": 992818,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345942106643.951, "dur": 5.607, + "args": { + "External id": 992819,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106647.618, "dur": 0.786, + "args": { + "External id": 992820,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345942106656.189, "dur": 23.645, + "args": { + "External id": 992821,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106658.033, "dur": 2.860, + "args": { + "External id": 992822,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106662.304, "dur": 0.511, + "args": { + "External id": 992823,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106664.251, "dur": 0.435, + "args": { + "External id": 992824,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106666.082, "dur": 0.269, + "args": { + "External id": 992825,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106667.346, "dur": 0.392, + "args": { + "External id": 992826,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106669.244, "dur": 0.555, + "args": { + "External id": 992827,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106671.404, "dur": 0.341, + "args": { + "External id": 992828,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106672.655, "dur": 0.448, + "args": { + "External id": 992829,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942106674.876, "dur": 2.385, + "args": { + "External id": 992830,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942106690.407, "dur": 30.674, + "args": { + "External id": 992831,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345942106772.224, "dur": 117.234, + "args": { + "External id": 992832,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942106797.985, "dur": 88.153, + "args": { + "External id": 992833,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10816, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345942106807.943, "dur": 73.606, + "args": { + "External id": 992834,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942106907.072, "dur": 2.239, + "args": { + "External id": 992835,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10818, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942106988.145, "dur": 1845.906, + "args": { + "External id": 992836,"Sequence number": 10552483, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10819 + } + }, + { + "ph": "f", "id": 436, "pid": 2338706, "tid": 2379422, "ts": 6345942106988.145, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942107159.360, "dur": 116.657, + "args": { + "External id": 992837,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345942107323.033, "dur": 42.788, + "args": { + "External id": 992838,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345942107385.329, "dur": 48.503, + "args": { + "External id": 992839,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942107443.871, "dur": 31.633, + "args": { + "External id": 992840,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942107481.571, "dur": 32.498, + "args": { + "External id": 992841,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942107520.241, "dur": 27.293, + "args": { + "External id": 992842,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942107574.149, "dur": 31.200, + "args": { + "External id": 992843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345942107634.727, "dur": 21.534, + "args": { + "External id": 992844,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345942107675.627, "dur": 29.615, + "args": { + "External id": 992845,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942107726.670, "dur": 20.086, + "args": { + "External id": 992846,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942107761.266, "dur": 15.373, + "args": { + "External id": 992847,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942107787.839, "dur": 36.531, + "args": { + "External id": 992848,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942107828.298, "dur": 32.473, + "args": { + "External id": 992849,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345942107890.135, "dur": 306.309, + "args": { + "External id": 992850,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942107970.671, "dur": 6.971, + "args": { + "External id": 992851,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942107979.958, "dur": 3.360, + "args": { + "External id": 992852,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942107984.639, "dur": 2.490, + "args": { + "External id": 992853,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942107988.367, "dur": 4.453, + "args": { + "External id": 992854,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345942108086.322, "dur": 5.562, + "args": { + "External id": 992855,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942108088.396, "dur": 3.126, + "args": { + "External id": 992856,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345942108093.575, "dur": 34.016, + "args": { + "External id": 992857,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942108099.734, "dur": 2.090, + "args": { + "External id": 992858,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345942108129.290, "dur": 2.012, + "args": { + "External id": 992859,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942108130.458, "dur": 0.754, + "args": { + "External id": 992860,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345942108132.263, "dur": 14.047, + "args": { + "External id": 992861,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942108133.901, "dur": 0.671, + "args": { + "External id": 992862,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345942108236.427, "dur": 27.279, + "args": { + "External id": 992863,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942108332.764, "dur": 29.175, + "args": { + "External id": 992864,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942108376.144, "dur": 55.106, + "args": { + "External id": 992865,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942108438.232, "dur": 40.892, + "args": { + "External id": 992866,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942108491.147, "dur": 22.081, + "args": { + "External id": 992867,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942108522.097, "dur": 32.362, + "args": { + "External id": 992868,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942108562.437, "dur": 28.070, + "args": { + "External id": 992869,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942108598.109, "dur": 30.239, + "args": { + "External id": 992870,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345942108647.409, "dur": 23.554, + "args": { + "External id": 992871,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942108686.680, "dur": 23.877, + "args": { + "External id": 992872,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942108725.796, "dur": 18.378, + "args": { + "External id": 992873,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942108758.749, "dur": 13.909, + "args": { + "External id": 992874,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345942108785.385, "dur": 16.244, + "args": { + "External id": 992875,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942108880.758, "dur": 16.185, + "args": { + "External id": 992876,"Record function id": 0, "Ev Idx": 10859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942108884.467, "dur": 11.513, + "args": { + "External id": 992877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942108889.447, "dur": 5.577, + "args": { + "External id": 992878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942108891.072, "dur": 3.796, + "args": { + "External id": 992879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942108901.038, "dur": 6.226, + "args": { + "External id": 992880,"Record function id": 0, "Ev Idx": 10863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942108902.644, "dur": 4.121, + "args": { + "External id": 992881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942108903.304, "dur": 2.764, + "args": { + "External id": 992882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942108904.751, "dur": 1.211, + "args": { + "External id": 992883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942108910.693, "dur": 5.650, + "args": { + "External id": 992884,"Record function id": 0, "Ev Idx": 10867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942108912.293, "dur": 3.568, + "args": { + "External id": 992885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942108913.065, "dur": 2.360, + "args": { + "External id": 992886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942108914.188, "dur": 1.157, + "args": { + "External id": 992887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942108919.494, "dur": 5.157, + "args": { + "External id": 992888,"Record function id": 0, "Ev Idx": 10871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942108921.001, "dur": 3.236, + "args": { + "External id": 992889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942108921.700, "dur": 2.094, + "args": { + "External id": 992890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942108922.670, "dur": 1.041, + "args": { + "External id": 992891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942108927.834, "dur": 5.576, + "args": { + "External id": 992892,"Record function id": 0, "Ev Idx": 10875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942108929.146, "dur": 3.831, + "args": { + "External id": 992893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942108929.700, "dur": 2.845, + "args": { + "External id": 992894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942108930.737, "dur": 1.721, + "args": { + "External id": 992895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942108936.498, "dur": 7.446, + "args": { + "External id": 992896,"Record function id": 0, "Ev Idx": 10879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942108937.651, "dur": 5.882, + "args": { + "External id": 992897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942108938.463, "dur": 4.322, + "args": { + "External id": 992898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942108939.531, "dur": 3.156, + "args": { + "External id": 992899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942108947.151, "dur": 5.180, + "args": { + "External id": 992900,"Record function id": 0, "Ev Idx": 10883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942108948.505, "dur": 3.406, + "args": { + "External id": 992901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942108949.190, "dur": 2.123, + "args": { + "External id": 992902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942108950.345, "dur": 0.885, + "args": { + "External id": 992903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942108956.114, "dur": 4.134, + "args": { + "External id": 992904,"Record function id": 0, "Ev Idx": 10887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942108957.129, "dur": 2.679, + "args": { + "External id": 992905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942108957.916, "dur": 1.454, + "args": { + "External id": 992906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942108958.398, "dur": 0.881, + "args": { + "External id": 992907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942108963.766, "dur": 4.480, + "args": { + "External id": 992908,"Record function id": 0, "Ev Idx": 10891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942108964.945, "dur": 2.890, + "args": { + "External id": 992909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942108965.431, "dur": 1.661, + "args": { + "External id": 992910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942108966.103, "dur": 0.908, + "args": { + "External id": 992911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942108972.994, "dur": 62939.811, + "args": { + "External id": 992912,"Record function id": 0, "Sequence number": 10552482, "Fwd thread id": 1, "Ev Idx": 10895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942108974.483, "dur": 62928.674, + "args": { + "External id": 992913,"Sequence number": 10552482, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10896 + } + }, + { + "ph": "f", "id": 437, "pid": 2338706, "tid": 2379422, "ts": 6345942108974.483, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.5)", "pid": 2338706, "tid": 2379422, + "ts": 6345942109003.662, "dur": 109.068, + "args": { + "External id": 992914,"Record function id": 0, "Ev Idx": 10897 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.5)", "pid": 2338706, "tid": 2379422, + "ts": 6345942109123.309, "dur": 71.043, + "args": { + "External id": 992915,"Record function id": 0, "Ev Idx": 10898 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.5)", "pid": 2338706, "tid": 2379422, + "ts": 6345942109200.502, "dur": 62694.748, + "args": { + "External id": 992916,"Record function id": 0, "Ev Idx": 10899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942109293.993, "dur": 7.881, + "args": { + "External id": 992917,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942109313.028, "dur": 5.021, + "args": { + "External id": 992918,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345942109332.799, "dur": 61717.650, + "args": { + "External id": 992919,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345942109347.960, "dur": 61688.991, + "args": { + "External id": 992920,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942109512.380, "dur": 17.999, + "args": { + "External id": 992921,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345942109552.199, "dur": 61427.386, + "args": { + "External id": 992922,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942109555.126, "dur": 61423.165, + "args": { + "External id": 992923,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942109559.946, "dur": 9.121, + "args": { + "External id": 992924,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942109571.512, "dur": 61402.031, + "args": { + "External id": 992925,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942171190.268, "dur": 12.418, + "args": { + "External id": 992926,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942171193.843, "dur": 8.294, + "args": { + "External id": 992927,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345942171234.781, "dur": 305.728, + "args": { + "External id": 992928,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942171264.937, "dur": 270.731, + "args": { + "External id": 992929,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10912, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345942171276.767, "dur": 253.387, + "args": { + "External id": 992930,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942171557.475, "dur": 2.157, + "args": { + "External id": 992931,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10914, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942171615.088, "dur": 6.464, + "args": { + "External id": 992932,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942171634.043, "dur": 31.771, + "args": { + "External id": 992933,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942171675.379, "dur": 4.291, + "args": { + "External id": 992934,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942171685.356, "dur": 11.627, + "args": { + "External id": 992935,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942171702.456, "dur": 0.980, + "args": { + "External id": 992936,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942171707.806, "dur": 11.113, + "args": { + "External id": 992937,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942171723.283, "dur": 0.866, + "args": { + "External id": 992938,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942171728.164, "dur": 10.465, + "args": { + "External id": 992939,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942171742.740, "dur": 0.803, + "args": { + "External id": 992940,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942171747.667, "dur": 9.449, + "args": { + "External id": 992941,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942171760.801, "dur": 1.180, + "args": { + "External id": 992942,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942171765.879, "dur": 10.034, + "args": { + "External id": 992943,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942171780.241, "dur": 1.053, + "args": { + "External id": 992944,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942171785.669, "dur": 9.708, + "args": { + "External id": 992945,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942171801.641, "dur": 1.007, + "args": { + "External id": 992946,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942171807.496, "dur": 12.623, + "args": { + "External id": 992947,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942171823.803, "dur": 1.227, + "args": { + "External id": 992948,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942171828.974, "dur": 11.196, + "args": { + "External id": 992949,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942171928.639, "dur": 2944.324, + "args": { + "External id": 992950,"Record function id": 0, "Ev Idx": 10933 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.4)", "pid": 2338706, "tid": 2379422, + "ts": 6345942171949.165, "dur": 1099.126, + "args": { + "External id": 992951,"Record function id": 0, "Ev Idx": 10934 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338706, "tid": 2379422, + "ts": 6345942171963.604, "dur": 386.163, + "args": { + "External id": 992952,"Record function id": 0, "Ev Idx": 10935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942172110.807, "dur": 7.541, + "args": { + "External id": 992953,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942172122.213, "dur": 1.179, + "args": { + "External id": 992954,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942172125.248, "dur": 1.236, + "args": { + "External id": 992955,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942172128.370, "dur": 0.969, + "args": { + "External id": 992956,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942172131.348, "dur": 0.749, + "args": { + "External id": 992957,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942172133.578, "dur": 0.744, + "args": { + "External id": 992958,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942172137.284, "dur": 0.784, + "args": { + "External id": 992959,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942172139.714, "dur": 2.210, + "args": { + "External id": 992960,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942172143.455, "dur": 2.540, + "args": { + "External id": 992961,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942172147.608, "dur": 0.703, + "args": { + "External id": 992962,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942172169.950, "dur": 147.580, + "args": { + "External id": 992963,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942172187.792, "dur": 124.773, + "args": { + "External id": 992964,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942172204.058, "dur": 15.357, + "args": { + "External id": 992965,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345942172223.342, "dur": 62.258, + "args": { + "External id": 992966,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942172226.220, "dur": 59.038, + "args": { + "External id": 992967,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172230.019, "dur": 5.236, + "args": { + "External id": 992968,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942172237.044, "dur": 47.644, + "args": { + "External id": 992969,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10952 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.3", "pid": 2338706, "tid": 2379422, + "ts": 6345942172440.552, "dur": 599.037, + "args": { + "External id": 992970,"Record function id": 0, "Ev Idx": 10953 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338706, "tid": 2379422, + "ts": 6345942172457.751, "dur": 546.403, + "args": { + "External id": 992971,"Record function id": 0, "Ev Idx": 10954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942172523.517, "dur": 4.482, + "args": { + "External id": 992972,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345942172544.070, "dur": 29.196, + "args": { + "External id": 992973,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172549.454, "dur": 1.927, + "args": { + "External id": 992974,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172553.599, "dur": 0.526, + "args": { + "External id": 992975,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172555.959, "dur": 0.327, + "args": { + "External id": 992976,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172557.532, "dur": 2.447, + "args": { + "External id": 992977,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172561.691, "dur": 0.503, + "args": { + "External id": 992978,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172563.479, "dur": 0.269, + "args": { + "External id": 992979,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172564.981, "dur": 0.516, + "args": { + "External id": 992980,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172567.413, "dur": 0.374, + "args": { + "External id": 992981,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172569.692, "dur": 0.358, + "args": { + "External id": 992982,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942172584.219, "dur": 41.567, + "args": { + "External id": 992983,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345942172654.098, "dur": 111.745, + "args": { + "External id": 992984,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942172664.332, "dur": 3.066, + "args": { + "External id": 992985,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345942172672.484, "dur": 10.297, + "args": { + "External id": 992986,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345942172676.873, "dur": 5.496, + "args": { + "External id": 992987,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172680.536, "dur": 0.594, + "args": { + "External id": 992988,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345942172689.471, "dur": 26.782, + "args": { + "External id": 992989,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172691.456, "dur": 2.410, + "args": { + "External id": 992990,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172695.649, "dur": 0.407, + "args": { + "External id": 992991,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172698.070, "dur": 0.354, + "args": { + "External id": 992992,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172699.268, "dur": 0.433, + "args": { + "External id": 992993,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172701.293, "dur": 0.554, + "args": { + "External id": 992994,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172703.419, "dur": 0.271, + "args": { + "External id": 992995,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172707.698, "dur": 0.392, + "args": { + "External id": 992996,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172709.402, "dur": 0.382, + "args": { + "External id": 992997,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942172711.301, "dur": 2.399, + "args": { + "External id": 992998,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942172726.271, "dur": 31.807, + "args": { + "External id": 992999,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345942172809.420, "dur": 121.316, + "args": { + "External id": 993000,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942172837.166, "dur": 90.453, + "args": { + "External id": 993001,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10984, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345942172846.844, "dur": 72.763, + "args": { + "External id": 993002,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942172947.547, "dur": 2.046, + "args": { + "External id": 993003,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10986, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942173090.670, "dur": 1760.668, + "args": { + "External id": 993004,"Sequence number": 10552481, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10987 + } + }, + { + "ph": "f", "id": 438, "pid": 2338706, "tid": 2379422, "ts": 6345942173090.670, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942173208.188, "dur": 113.865, + "args": { + "External id": 993005,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345942173367.379, "dur": 40.566, + "args": { + "External id": 993006,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345942173428.160, "dur": 47.588, + "args": { + "External id": 993007,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942173503.310, "dur": 33.738, + "args": { + "External id": 993008,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942173544.233, "dur": 32.021, + "args": { + "External id": 993009,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942173582.619, "dur": 26.685, + "args": { + "External id": 993010,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942173618.490, "dur": 29.048, + "args": { + "External id": 993011,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345942173675.992, "dur": 24.574, + "args": { + "External id": 993012,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345942173720.164, "dur": 30.724, + "args": { + "External id": 993013,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942173773.662, "dur": 20.720, + "args": { + "External id": 993014,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942173808.076, "dur": 16.620, + "args": { + "External id": 993015,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942173835.522, "dur": 35.735, + "args": { + "External id": 993016,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942173874.993, "dur": 33.540, + "args": { + "External id": 993017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345942173936.577, "dur": 317.867, + "args": { + "External id": 993018,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942174033.600, "dur": 7.622, + "args": { + "External id": 993019,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942174043.705, "dur": 3.011, + "args": { + "External id": 993020,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942174047.942, "dur": 2.951, + "args": { + "External id": 993021,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942174051.954, "dur": 38.267, + "args": { + "External id": 993022,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345942174142.618, "dur": 5.599, + "args": { + "External id": 993023,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942174145.259, "dur": 2.712, + "args": { + "External id": 993024,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345942174150.524, "dur": 33.466, + "args": { + "External id": 993025,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942174156.166, "dur": 1.884, + "args": { + "External id": 993026,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345942174185.970, "dur": 2.093, + "args": { + "External id": 993027,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942174187.200, "dur": 0.781, + "args": { + "External id": 993028,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345942174189.497, "dur": 17.314, + "args": { + "External id": 993029,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942174191.557, "dur": 0.516, + "args": { + "External id": 993030,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345942174294.771, "dur": 28.302, + "args": { + "External id": 993031,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942174341.710, "dur": 19.288, + "args": { + "External id": 993032,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942174371.044, "dur": 48.792, + "args": { + "External id": 993033,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942174427.371, "dur": 40.495, + "args": { + "External id": 993034,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942174476.249, "dur": 22.720, + "args": { + "External id": 993035,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942174507.471, "dur": 35.865, + "args": { + "External id": 993036,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942174551.181, "dur": 28.231, + "args": { + "External id": 993037,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942174586.913, "dur": 32.922, + "args": { + "External id": 993038,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345942174637.313, "dur": 38.568, + "args": { + "External id": 993039,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942174699.499, "dur": 25.272, + "args": { + "External id": 993040,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942174739.413, "dur": 20.473, + "args": { + "External id": 993041,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942174774.638, "dur": 13.686, + "args": { + "External id": 993042,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345942174800.148, "dur": 18.370, + "args": { + "External id": 993043,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942174895.940, "dur": 16.559, + "args": { + "External id": 993044,"Record function id": 0, "Ev Idx": 11027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942174899.978, "dur": 11.415, + "args": { + "External id": 993045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942174904.587, "dur": 6.025, + "args": { + "External id": 993046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942174906.657, "dur": 3.845, + "args": { + "External id": 993047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942174916.306, "dur": 5.947, + "args": { + "External id": 993048,"Record function id": 0, "Ev Idx": 11031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942174918.168, "dur": 3.567, + "args": { + "External id": 993049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942174918.926, "dur": 2.196, + "args": { + "External id": 993050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942174919.815, "dur": 1.158, + "args": { + "External id": 993051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942174925.655, "dur": 5.138, + "args": { + "External id": 993052,"Record function id": 0, "Ev Idx": 11035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942174927.311, "dur": 2.968, + "args": { + "External id": 993053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942174927.773, "dur": 2.100, + "args": { + "External id": 993054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942174928.634, "dur": 1.158, + "args": { + "External id": 993055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942174933.919, "dur": 5.214, + "args": { + "External id": 993056,"Record function id": 0, "Ev Idx": 11039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942174935.299, "dur": 3.362, + "args": { + "External id": 993057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942174935.902, "dur": 2.331, + "args": { + "External id": 993058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942174936.991, "dur": 1.136, + "args": { + "External id": 993059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942174942.290, "dur": 4.623, + "args": { + "External id": 993060,"Record function id": 0, "Ev Idx": 11043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942174943.671, "dur": 2.822, + "args": { + "External id": 993061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942174944.326, "dur": 1.695, + "args": { + "External id": 993062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942174944.982, "dur": 0.957, + "args": { + "External id": 993063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942174950.026, "dur": 4.405, + "args": { + "External id": 993064,"Record function id": 0, "Ev Idx": 11047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942174951.753, "dur": 2.235, + "args": { + "External id": 993065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942174952.233, "dur": 1.298, + "args": { + "External id": 993066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942174952.774, "dur": 0.685, + "args": { + "External id": 993067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942174957.816, "dur": 5.724, + "args": { + "External id": 993068,"Record function id": 0, "Ev Idx": 11051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942174959.105, "dur": 4.026, + "args": { + "External id": 993069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942174959.609, "dur": 3.087, + "args": { + "External id": 993070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942174960.081, "dur": 2.545, + "args": { + "External id": 993071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942174967.077, "dur": 4.264, + "args": { + "External id": 993072,"Record function id": 0, "Ev Idx": 11055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942174968.121, "dur": 2.788, + "args": { + "External id": 993073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942174968.800, "dur": 1.424, + "args": { + "External id": 993074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942174969.070, "dur": 1.080, + "args": { + "External id": 993075,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942174974.802, "dur": 3.274, + "args": { + "External id": 993076,"Record function id": 0, "Ev Idx": 11059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942174975.694, "dur": 1.953, + "args": { + "External id": 993077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942174976.137, "dur": 0.946, + "args": { + "External id": 993078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942174976.438, "dur": 0.569, + "args": { + "External id": 993079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942174982.235, "dur": 61102.435, + "args": { + "External id": 993080,"Record function id": 0, "Sequence number": 10552480, "Fwd thread id": 1, "Ev Idx": 11063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942174983.729, "dur": 61059.900, + "args": { + "External id": 993081,"Sequence number": 10552480, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11064 + } + }, + { + "ph": "f", "id": 439, "pid": 2338706, "tid": 2379422, "ts": 6345942174983.729, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.4)", "pid": 2338706, "tid": 2379422, + "ts": 6345942175031.985, "dur": 74.093, + "args": { + "External id": 993082,"Record function id": 0, "Ev Idx": 11065 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.4)", "pid": 2338706, "tid": 2379422, + "ts": 6345942175115.216, "dur": 70.337, + "args": { + "External id": 993083,"Record function id": 0, "Ev Idx": 11066 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.4)", "pid": 2338706, "tid": 2379422, + "ts": 6345942175191.767, "dur": 60842.537, + "args": { + "External id": 993084,"Record function id": 0, "Ev Idx": 11067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942175287.870, "dur": 7.666, + "args": { + "External id": 993085,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942175306.320, "dur": 5.064, + "args": { + "External id": 993086,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345942175326.227, "dur": 59796.556, + "args": { + "External id": 993087,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345942175340.363, "dur": 59768.937, + "args": { + "External id": 993088,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942175449.258, "dur": 18.577, + "args": { + "External id": 993089,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345942175486.975, "dur": 59552.448, + "args": { + "External id": 993090,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942175489.683, "dur": 59548.659, + "args": { + "External id": 993091,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942175494.739, "dur": 8.939, + "args": { + "External id": 993092,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942175508.088, "dur": 59525.237, + "args": { + "External id": 993093,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942235237.586, "dur": 11.938, + "args": { + "External id": 993094,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942235241.067, "dur": 8.099, + "args": { + "External id": 993095,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345942235282.153, "dur": 365.145, + "args": { + "External id": 993096,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942235312.295, "dur": 330.070, + "args": { + "External id": 993097,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11080, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345942235325.875, "dur": 311.387, + "args": { + "External id": 993098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942235666.205, "dur": 2.265, + "args": { + "External id": 993099,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11082, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942235726.610, "dur": 6.565, + "args": { + "External id": 993100,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942235745.973, "dur": 35.005, + "args": { + "External id": 993101,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942235791.621, "dur": 1.464, + "args": { + "External id": 993102,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942235798.395, "dur": 14.895, + "args": { + "External id": 993103,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942235818.799, "dur": 3.116, + "args": { + "External id": 993104,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942235826.839, "dur": 11.904, + "args": { + "External id": 993105,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942235844.345, "dur": 0.844, + "args": { + "External id": 993106,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942235849.558, "dur": 11.753, + "args": { + "External id": 993107,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942235865.863, "dur": 0.719, + "args": { + "External id": 993108,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942235870.553, "dur": 9.571, + "args": { + "External id": 993109,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942235884.479, "dur": 1.129, + "args": { + "External id": 993110,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942235890.000, "dur": 10.490, + "args": { + "External id": 993111,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942235904.390, "dur": 0.878, + "args": { + "External id": 993112,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942235909.460, "dur": 9.760, + "args": { + "External id": 993113,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942235923.529, "dur": 1.063, + "args": { + "External id": 993114,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942235928.495, "dur": 10.107, + "args": { + "External id": 993115,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942235942.594, "dur": 1.157, + "args": { + "External id": 993116,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942235947.586, "dur": 10.267, + "args": { + "External id": 993117,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942236103.347, "dur": 2858.692, + "args": { + "External id": 993118,"Record function id": 0, "Ev Idx": 11101 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.3)", "pid": 2338706, "tid": 2379422, + "ts": 6345942236128.480, "dur": 1057.635, + "args": { + "External id": 993119,"Record function id": 0, "Ev Idx": 11102 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338706, "tid": 2379422, + "ts": 6345942236146.340, "dur": 326.500, + "args": { + "External id": 993120,"Record function id": 0, "Ev Idx": 11103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942236236.298, "dur": 4.806, + "args": { + "External id": 993121,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942236244.319, "dur": 3.406, + "args": { + "External id": 993122,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942236249.642, "dur": 0.999, + "args": { + "External id": 993123,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942236252.472, "dur": 0.918, + "args": { + "External id": 993124,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942236257.074, "dur": 0.920, + "args": { + "External id": 993125,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942236259.643, "dur": 0.870, + "args": { + "External id": 993126,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942236262.111, "dur": 0.648, + "args": { + "External id": 993127,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942236264.209, "dur": 1.923, + "args": { + "External id": 993128,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942236269.772, "dur": 0.681, + "args": { + "External id": 993129,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942236271.855, "dur": 2.756, + "args": { + "External id": 993130,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942236292.738, "dur": 148.717, + "args": { + "External id": 993131,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942236310.023, "dur": 127.308, + "args": { + "External id": 993132,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942236325.851, "dur": 14.984, + "args": { + "External id": 993133,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345942236344.627, "dur": 65.633, + "args": { + "External id": 993134,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942236347.448, "dur": 62.512, + "args": { + "External id": 993135,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236351.587, "dur": 5.301, + "args": { + "External id": 993136,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942236359.250, "dur": 50.162, + "args": { + "External id": 993137,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11120 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.2", "pid": 2338706, "tid": 2379422, + "ts": 6345942236556.845, "dur": 621.207, + "args": { + "External id": 993138,"Record function id": 0, "Ev Idx": 11121 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338706, "tid": 2379422, + "ts": 6345942236574.655, "dur": 589.372, + "args": { + "External id": 993139,"Record function id": 0, "Ev Idx": 11122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942236635.207, "dur": 5.313, + "args": { + "External id": 993140,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345942236656.219, "dur": 29.109, + "args": { + "External id": 993141,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236661.170, "dur": 1.975, + "args": { + "External id": 993142,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236665.331, "dur": 0.536, + "args": { + "External id": 993143,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236667.648, "dur": 0.327, + "args": { + "External id": 993144,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236669.181, "dur": 0.530, + "args": { + "External id": 993145,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236671.294, "dur": 2.520, + "args": { + "External id": 993146,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236675.631, "dur": 0.460, + "args": { + "External id": 993147,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236677.343, "dur": 0.671, + "args": { + "External id": 993148,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236679.443, "dur": 0.432, + "args": { + "External id": 993149,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236681.681, "dur": 0.320, + "args": { + "External id": 993150,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942236697.264, "dur": 38.529, + "args": { + "External id": 993151,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345942236765.541, "dur": 108.128, + "args": { + "External id": 993152,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 11135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942236775.019, "dur": 3.303, + "args": { + "External id": 993153,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345942236783.952, "dur": 10.450, + "args": { + "External id": 993154,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345942236788.216, "dur": 5.691, + "args": { + "External id": 993155,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 11138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236792.039, "dur": 0.706, + "args": { + "External id": 993156,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345942236801.076, "dur": 23.332, + "args": { + "External id": 993157,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236803.394, "dur": 0.627, + "args": { + "External id": 993158,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236805.418, "dur": 2.531, + "args": { + "External id": 993159,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236809.715, "dur": 0.463, + "args": { + "External id": 993160,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236811.861, "dur": 0.580, + "args": { + "External id": 993161,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236813.682, "dur": 0.727, + "args": { + "External id": 993162,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236816.222, "dur": 0.359, + "args": { + "External id": 993163,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236818.084, "dur": 0.546, + "args": { + "External id": 993164,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236819.553, "dur": 0.429, + "args": { + "External id": 993165,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942236821.545, "dur": 0.377, + "args": { + "External id": 993166,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942236834.396, "dur": 29.182, + "args": { + "External id": 993167,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345942236914.915, "dur": 136.580, + "args": { + "External id": 993168,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 11151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942236939.806, "dur": 107.670, + "args": { + "External id": 993169,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11152, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345942236949.548, "dur": 93.141, + "args": { + "External id": 993170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 11153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942237102.853, "dur": 3.023, + "args": { + "External id": 993171,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11154, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942237194.066, "dur": 1746.237, + "args": { + "External id": 993172,"Sequence number": 10552479, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11155 + } + }, + { + "ph": "f", "id": 440, "pid": 2338706, "tid": 2379422, "ts": 6345942237194.066, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942237307.294, "dur": 108.328, + "args": { + "External id": 993173,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345942237474.875, "dur": 41.277, + "args": { + "External id": 993174,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345942237536.703, "dur": 50.601, + "args": { + "External id": 993175,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942237597.406, "dur": 31.532, + "args": { + "External id": 993176,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942237635.631, "dur": 32.133, + "args": { + "External id": 993177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942237674.580, "dur": 28.222, + "args": { + "External id": 993178,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942237711.571, "dur": 29.394, + "args": { + "External id": 993179,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345942237768.242, "dur": 23.835, + "args": { + "External id": 993180,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345942237812.231, "dur": 28.890, + "args": { + "External id": 993181,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942237864.764, "dur": 18.917, + "args": { + "External id": 993182,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942237897.996, "dur": 16.029, + "args": { + "External id": 993183,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942237924.672, "dur": 35.942, + "args": { + "External id": 993184,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942237964.471, "dur": 33.786, + "args": { + "External id": 993185,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345942238048.405, "dur": 304.460, + "args": { + "External id": 993186,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942238167.834, "dur": 7.423, + "args": { + "External id": 993187,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942238181.758, "dur": 2.976, + "args": { + "External id": 993188,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942238186.294, "dur": 2.693, + "args": { + "External id": 993189,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942238190.384, "dur": 4.789, + "args": { + "External id": 993190,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345942238245.152, "dur": 5.526, + "args": { + "External id": 993191,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942238247.660, "dur": 2.839, + "args": { + "External id": 993192,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345942238252.733, "dur": 34.094, + "args": { + "External id": 993193,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942238258.797, "dur": 1.879, + "args": { + "External id": 993194,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345942238288.583, "dur": 2.290, + "args": { + "External id": 993195,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942238290.151, "dur": 0.636, + "args": { + "External id": 993196,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345942238292.314, "dur": 16.866, + "args": { + "External id": 993197,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942238294.491, "dur": 0.625, + "args": { + "External id": 993198,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345942238391.427, "dur": 28.005, + "args": { + "External id": 993199,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942238435.875, "dur": 18.222, + "args": { + "External id": 993200,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942238463.509, "dur": 48.525, + "args": { + "External id": 993201,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942238519.932, "dur": 40.644, + "args": { + "External id": 993202,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942238569.566, "dur": 22.421, + "args": { + "External id": 993203,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942238600.210, "dur": 31.957, + "args": { + "External id": 993204,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942238640.014, "dur": 27.991, + "args": { + "External id": 993205,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942238688.231, "dur": 42.359, + "args": { + "External id": 993206,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345942238751.692, "dur": 23.248, + "args": { + "External id": 993207,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942238791.679, "dur": 25.698, + "args": { + "External id": 993208,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942238830.559, "dur": 20.485, + "args": { + "External id": 993209,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942238865.147, "dur": 14.174, + "args": { + "External id": 993210,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345942238894.892, "dur": 15.256, + "args": { + "External id": 993211,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942238985.477, "dur": 15.670, + "args": { + "External id": 993212,"Record function id": 0, "Ev Idx": 11195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942238988.866, "dur": 11.312, + "args": { + "External id": 993213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942238993.482, "dur": 5.901, + "args": { + "External id": 993214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942238995.143, "dur": 4.124, + "args": { + "External id": 993215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942239005.299, "dur": 26.302, + "args": { + "External id": 993216,"Record function id": 0, "Ev Idx": 11199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942239007.000, "dur": 23.559, + "args": { + "External id": 993217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942239025.291, "dur": 3.291, + "args": { + "External id": 993218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942239026.560, "dur": 1.735, + "args": { + "External id": 993219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942239037.053, "dur": 6.090, + "args": { + "External id": 993220,"Record function id": 0, "Ev Idx": 11203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942239038.886, "dur": 3.816, + "args": { + "External id": 993221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942239039.702, "dur": 2.400, + "args": { + "External id": 993222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942239040.266, "dur": 1.724, + "args": { + "External id": 993223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942239046.414, "dur": 3.636, + "args": { + "External id": 993224,"Record function id": 0, "Ev Idx": 11207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942239047.396, "dur": 2.219, + "args": { + "External id": 993225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942239048.032, "dur": 1.183, + "args": { + "External id": 993226,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942239048.455, "dur": 0.687, + "args": { + "External id": 993227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942239088.435, "dur": 7.134, + "args": { + "External id": 993228,"Record function id": 0, "Ev Idx": 11211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942239090.536, "dur": 4.347, + "args": { + "External id": 993229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942239091.606, "dur": 2.100, + "args": { + "External id": 993230,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942239092.089, "dur": 1.410, + "args": { + "External id": 993231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942239099.314, "dur": 6.778, + "args": { + "External id": 993232,"Record function id": 0, "Ev Idx": 11215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942239100.928, "dur": 4.758, + "args": { + "External id": 993233,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942239101.382, "dur": 3.635, + "args": { + "External id": 993234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942239101.938, "dur": 3.012, + "args": { + "External id": 993235,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942239109.292, "dur": 4.812, + "args": { + "External id": 993236,"Record function id": 0, "Ev Idx": 11219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942239110.699, "dur": 2.993, + "args": { + "External id": 993237,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942239111.315, "dur": 1.612, + "args": { + "External id": 993238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942239111.611, "dur": 1.251, + "args": { + "External id": 993239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942239117.604, "dur": 4.832, + "args": { + "External id": 993240,"Record function id": 0, "Ev Idx": 11223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942239119.156, "dur": 2.812, + "args": { + "External id": 993241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942239119.701, "dur": 1.548, + "args": { + "External id": 993242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942239120.537, "dur": 0.604, + "args": { + "External id": 993243,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942239125.776, "dur": 3.988, + "args": { + "External id": 993244,"Record function id": 0, "Ev Idx": 11227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942239127.036, "dur": 2.298, + "args": { + "External id": 993245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942239127.563, "dur": 1.135, + "args": { + "External id": 993246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942239128.035, "dur": 0.600, + "args": { + "External id": 993247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942239133.714, "dur": 61087.345, + "args": { + "External id": 993248,"Record function id": 0, "Sequence number": 10552478, "Fwd thread id": 1, "Ev Idx": 11231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942239135.275, "dur": 61076.441, + "args": { + "External id": 993249,"Sequence number": 10552478, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11232 + } + }, + { + "ph": "f", "id": 441, "pid": 2338706, "tid": 2379422, "ts": 6345942239135.275, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.3)", "pid": 2338706, "tid": 2379422, + "ts": 6345942239170.029, "dur": 39.991, + "args": { + "External id": 993250,"Record function id": 0, "Ev Idx": 11233 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.3)", "pid": 2338706, "tid": 2379422, + "ts": 6345942239218.462, "dur": 72.774, + "args": { + "External id": 993251,"Record function id": 0, "Ev Idx": 11234 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.3)", "pid": 2338706, "tid": 2379422, + "ts": 6345942239297.623, "dur": 60904.704, + "args": { + "External id": 993252,"Record function id": 0, "Ev Idx": 11235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942239390.476, "dur": 7.492, + "args": { + "External id": 993253,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942239407.890, "dur": 4.899, + "args": { + "External id": 993254,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345942239427.310, "dur": 59710.525, + "args": { + "External id": 993255,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345942239441.795, "dur": 59682.479, + "args": { + "External id": 993256,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942239539.098, "dur": 19.088, + "args": { + "External id": 993257,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345942239577.163, "dur": 59471.435, + "args": { + "External id": 993258,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942239580.026, "dur": 59467.499, + "args": { + "External id": 993259,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942239584.557, "dur": 9.060, + "args": { + "External id": 993260,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942239595.893, "dur": 59446.671, + "args": { + "External id": 993261,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942299251.934, "dur": 12.171, + "args": { + "External id": 993262,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942299255.424, "dur": 8.256, + "args": { + "External id": 993263,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345942299298.662, "dur": 394.227, + "args": { + "External id": 993264,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942299326.700, "dur": 360.571, + "args": { + "External id": 993265,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11248, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345942299338.077, "dur": 343.504, + "args": { + "External id": 993266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942299713.333, "dur": 2.392, + "args": { + "External id": 993267,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11250, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942299776.183, "dur": 6.639, + "args": { + "External id": 993268,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942299795.327, "dur": 34.256, + "args": { + "External id": 993269,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942299839.815, "dur": 1.536, + "args": { + "External id": 993270,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942299847.248, "dur": 12.000, + "args": { + "External id": 993271,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942299864.847, "dur": 3.389, + "args": { + "External id": 993272,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942299872.814, "dur": 10.187, + "args": { + "External id": 993273,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942299913.339, "dur": 1.289, + "args": { + "External id": 993274,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942299919.383, "dur": 11.720, + "args": { + "External id": 993275,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942299936.094, "dur": 0.909, + "args": { + "External id": 993276,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942299940.928, "dur": 11.688, + "args": { + "External id": 993277,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942299956.963, "dur": 1.043, + "args": { + "External id": 993278,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942299962.064, "dur": 12.062, + "args": { + "External id": 993279,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942299979.372, "dur": 0.864, + "args": { + "External id": 993280,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942300032.085, "dur": 16.490, + "args": { + "External id": 993281,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300093.294, "dur": 1.903, + "args": { + "External id": 993282,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942300101.188, "dur": 16.137, + "args": { + "External id": 993283,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300121.972, "dur": 0.847, + "args": { + "External id": 993284,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942300126.923, "dur": 11.302, + "args": { + "External id": 993285,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942300238.719, "dur": 2871.362, + "args": { + "External id": 993286,"Record function id": 0, "Ev Idx": 11269 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.2)", "pid": 2338706, "tid": 2379422, + "ts": 6345942300261.112, "dur": 1063.536, + "args": { + "External id": 993287,"Record function id": 0, "Ev Idx": 11270 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338706, "tid": 2379422, + "ts": 6345942300277.910, "dur": 319.022, + "args": { + "External id": 993288,"Record function id": 0, "Ev Idx": 11271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942300370.110, "dur": 4.062, + "args": { + "External id": 993289,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942300377.261, "dur": 2.931, + "args": { + "External id": 993290,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942300381.986, "dur": 1.031, + "args": { + "External id": 993291,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942300385.918, "dur": 0.781, + "args": { + "External id": 993292,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942300388.209, "dur": 0.964, + "args": { + "External id": 993293,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942300390.770, "dur": 0.787, + "args": { + "External id": 993294,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942300393.414, "dur": 0.621, + "args": { + "External id": 993295,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942300396.632, "dur": 1.923, + "args": { + "External id": 993296,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942300399.770, "dur": 0.735, + "args": { + "External id": 993297,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942300401.999, "dur": 2.389, + "args": { + "External id": 993298,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942300422.744, "dur": 145.405, + "args": { + "External id": 993299,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942300440.003, "dur": 123.540, + "args": { + "External id": 993300,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942300455.619, "dur": 14.013, + "args": { + "External id": 993301,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345942300474.840, "dur": 62.596, + "args": { + "External id": 993302,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942300477.536, "dur": 59.540, + "args": { + "External id": 993303,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300481.384, "dur": 6.376, + "args": { + "External id": 993304,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942300489.335, "dur": 47.126, + "args": { + "External id": 993305,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11288 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.1", "pid": 2338706, "tid": 2379422, + "ts": 6345942300683.198, "dur": 634.034, + "args": { + "External id": 993306,"Record function id": 0, "Ev Idx": 11289 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338706, "tid": 2379422, + "ts": 6345942300698.986, "dur": 605.043, + "args": { + "External id": 993307,"Record function id": 0, "Ev Idx": 11290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942300759.359, "dur": 4.459, + "args": { + "External id": 993308,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345942300779.350, "dur": 27.445, + "args": { + "External id": 993309,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300784.635, "dur": 1.556, + "args": { + "External id": 993310,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300787.830, "dur": 0.685, + "args": { + "External id": 993311,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300790.219, "dur": 0.455, + "args": { + "External id": 993312,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300792.304, "dur": 0.342, + "args": { + "External id": 993313,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300793.588, "dur": 2.235, + "args": { + "External id": 993314,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300797.528, "dur": 0.396, + "args": { + "External id": 993315,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300799.489, "dur": 0.303, + "args": { + "External id": 993316,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300801.088, "dur": 0.407, + "args": { + "External id": 993317,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300802.921, "dur": 0.334, + "args": { + "External id": 993318,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942300818.054, "dur": 39.276, + "args": { + "External id": 993319,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345942300886.417, "dur": 101.844, + "args": { + "External id": 993320,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 11303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942300896.211, "dur": 2.962, + "args": { + "External id": 993321,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345942300904.187, "dur": 9.833, + "args": { + "External id": 993322,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345942300908.457, "dur": 5.136, + "args": { + "External id": 993323,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 11306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300911.831, "dur": 0.681, + "args": { + "External id": 993324,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345942300919.957, "dur": 20.126, + "args": { + "External id": 993325,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300921.771, "dur": 0.402, + "args": { + "External id": 993326,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300923.310, "dur": 2.387, + "args": { + "External id": 993327,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300927.131, "dur": 0.344, + "args": { + "External id": 993328,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300928.974, "dur": 0.367, + "args": { + "External id": 993329,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300930.594, "dur": 0.297, + "args": { + "External id": 993330,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300932.377, "dur": 0.389, + "args": { + "External id": 993331,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300934.427, "dur": 0.328, + "args": { + "External id": 993332,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300936.126, "dur": 0.323, + "args": { + "External id": 993333,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942300937.476, "dur": 0.252, + "args": { + "External id": 993334,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942300950.525, "dur": 28.744, + "args": { + "External id": 993335,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345942301105.637, "dur": 121.395, + "args": { + "External id": 993336,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 11319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942301132.985, "dur": 90.618, + "args": { + "External id": 993337,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11320, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345942301143.751, "dur": 75.427, + "args": { + "External id": 993338,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 11321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942301242.201, "dur": 1.889, + "args": { + "External id": 993339,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11322, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942301332.052, "dur": 1719.879, + "args": { + "External id": 993340,"Sequence number": 10552477, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11323 + } + }, + { + "ph": "f", "id": 442, "pid": 2338706, "tid": 2379422, "ts": 6345942301332.052, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942301445.268, "dur": 102.673, + "args": { + "External id": 993341,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345942301590.360, "dur": 42.212, + "args": { + "External id": 993342,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345942301652.480, "dur": 48.433, + "args": { + "External id": 993343,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942301711.155, "dur": 31.355, + "args": { + "External id": 993344,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942301749.007, "dur": 32.399, + "args": { + "External id": 993345,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942301788.163, "dur": 27.186, + "args": { + "External id": 993346,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942301824.601, "dur": 29.167, + "args": { + "External id": 993347,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345942301880.469, "dur": 22.052, + "args": { + "External id": 993348,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345942301922.367, "dur": 28.451, + "args": { + "External id": 993349,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942301972.738, "dur": 18.785, + "args": { + "External id": 993350,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942302004.392, "dur": 37.215, + "args": { + "External id": 993351,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942302089.444, "dur": 48.002, + "args": { + "External id": 993352,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942302142.672, "dur": 33.835, + "args": { + "External id": 993353,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345942302206.623, "dur": 250.005, + "args": { + "External id": 993354,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942302288.842, "dur": 6.735, + "args": { + "External id": 993355,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942302297.547, "dur": 3.049, + "args": { + "External id": 993356,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942302301.786, "dur": 2.324, + "args": { + "External id": 993357,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942302305.395, "dur": 4.739, + "args": { + "External id": 993358,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345942302355.419, "dur": 5.231, + "args": { + "External id": 993359,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942302357.780, "dur": 2.671, + "args": { + "External id": 993360,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345942302362.296, "dur": 31.901, + "args": { + "External id": 993361,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942302367.755, "dur": 1.625, + "args": { + "External id": 993362,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345942302395.535, "dur": 2.462, + "args": { + "External id": 993363,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942302396.951, "dur": 0.960, + "args": { + "External id": 993364,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345942302399.305, "dur": 15.519, + "args": { + "External id": 993365,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942302400.959, "dur": 0.530, + "args": { + "External id": 993366,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345942302495.147, "dur": 28.360, + "args": { + "External id": 993367,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942302540.526, "dur": 19.285, + "args": { + "External id": 993368,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942302568.739, "dur": 41.941, + "args": { + "External id": 993369,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942302617.427, "dur": 38.787, + "args": { + "External id": 993370,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942302664.363, "dur": 21.148, + "args": { + "External id": 993371,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942302693.423, "dur": 31.801, + "args": { + "External id": 993372,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942302744.147, "dur": 40.293, + "args": { + "External id": 993373,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942302795.347, "dur": 30.313, + "args": { + "External id": 993374,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345942302848.794, "dur": 23.714, + "args": { + "External id": 993375,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942302887.656, "dur": 27.075, + "args": { + "External id": 993376,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942302928.163, "dur": 16.422, + "args": { + "External id": 993377,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942302958.900, "dur": 13.686, + "args": { + "External id": 993378,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345942302984.627, "dur": 15.308, + "args": { + "External id": 993379,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942303134.964, "dur": 16.080, + "args": { + "External id": 993380,"Record function id": 0, "Ev Idx": 11363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942303138.618, "dur": 11.499, + "args": { + "External id": 993381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942303143.382, "dur": 5.720, + "args": { + "External id": 993382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942303144.891, "dur": 4.062, + "args": { + "External id": 993383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942303155.009, "dur": 5.889, + "args": { + "External id": 993384,"Record function id": 0, "Ev Idx": 11367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942303156.411, "dur": 3.970, + "args": { + "External id": 993385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942303157.277, "dur": 2.627, + "args": { + "External id": 993386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942303158.462, "dur": 1.320, + "args": { + "External id": 993387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942303164.000, "dur": 5.812, + "args": { + "External id": 993388,"Record function id": 0, "Ev Idx": 11371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942303165.591, "dur": 3.782, + "args": { + "External id": 993389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942303166.649, "dur": 2.178, + "args": { + "External id": 993390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942303167.569, "dur": 1.167, + "args": { + "External id": 993391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942303172.931, "dur": 4.240, + "args": { + "External id": 993392,"Record function id": 0, "Ev Idx": 11375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942303173.984, "dur": 2.763, + "args": { + "External id": 993393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942303174.636, "dur": 1.691, + "args": { + "External id": 993394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942303175.195, "dur": 1.067, + "args": { + "External id": 993395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942303190.342, "dur": 4.263, + "args": { + "External id": 993396,"Record function id": 0, "Ev Idx": 11379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942303191.479, "dur": 2.700, + "args": { + "External id": 993397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942303192.151, "dur": 1.408, + "args": { + "External id": 993398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942303192.444, "dur": 1.052, + "args": { + "External id": 993399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942303197.718, "dur": 6.547, + "args": { + "External id": 993400,"Record function id": 0, "Ev Idx": 11383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942303198.701, "dur": 5.121, + "args": { + "External id": 993401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942303199.515, "dur": 3.718, + "args": { + "External id": 993402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942303200.244, "dur": 2.928, + "args": { + "External id": 993403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942303208.050, "dur": 5.152, + "args": { + "External id": 993404,"Record function id": 0, "Ev Idx": 11387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942303209.184, "dur": 3.585, + "args": { + "External id": 993405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942303210.003, "dur": 1.597, + "args": { + "External id": 993406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942303210.330, "dur": 1.197, + "args": { + "External id": 993407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942303216.729, "dur": 3.878, + "args": { + "External id": 993408,"Record function id": 0, "Ev Idx": 11391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942303217.769, "dur": 2.436, + "args": { + "External id": 993409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942303218.446, "dur": 1.308, + "args": { + "External id": 993410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942303218.760, "dur": 0.930, + "args": { + "External id": 993411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942303224.065, "dur": 4.100, + "args": { + "External id": 993412,"Record function id": 0, "Ev Idx": 11395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942303225.069, "dur": 2.666, + "args": { + "External id": 993413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942303225.795, "dur": 1.039, + "args": { + "External id": 993414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942303226.100, "dur": 0.658, + "args": { + "External id": 993415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942303232.531, "dur": 61620.954, + "args": { + "External id": 993416,"Record function id": 0, "Sequence number": 10552476, "Fwd thread id": 1, "Ev Idx": 11399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942303234.116, "dur": 61609.584, + "args": { + "External id": 993417,"Sequence number": 10552476, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11400 + } + }, + { + "ph": "f", "id": 443, "pid": 2338706, "tid": 2379422, "ts": 6345942303234.116, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.2)", "pid": 2338706, "tid": 2379422, + "ts": 6345942303263.993, "dur": 42.935, + "args": { + "External id": 993418,"Record function id": 0, "Ev Idx": 11401 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.2)", "pid": 2338706, "tid": 2379422, + "ts": 6345942303314.527, "dur": 69.902, + "args": { + "External id": 993419,"Record function id": 0, "Ev Idx": 11402 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.2)", "pid": 2338706, "tid": 2379422, + "ts": 6345942303390.232, "dur": 61445.261, + "args": { + "External id": 993420,"Record function id": 0, "Ev Idx": 11403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942303481.558, "dur": 7.347, + "args": { + "External id": 993421,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942303498.355, "dur": 4.701, + "args": { + "External id": 993422,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345942303517.392, "dur": 60363.315, + "args": { + "External id": 993423,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345942303531.383, "dur": 60335.865, + "args": { + "External id": 993424,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942303697.716, "dur": 18.276, + "args": { + "External id": 993425,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345942303736.817, "dur": 60083.763, + "args": { + "External id": 993426,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942303739.922, "dur": 60079.610, + "args": { + "External id": 993427,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942303744.763, "dur": 9.088, + "args": { + "External id": 993428,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942303756.125, "dur": 60058.285, + "args": { + "External id": 993429,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942363991.950, "dur": 12.808, + "args": { + "External id": 993430,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942363995.520, "dur": 8.838, + "args": { + "External id": 993431,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345942364049.536, "dur": 398.900, + "args": { + "External id": 993432,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942364107.854, "dur": 335.527, + "args": { + "External id": 993433,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11416, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345942364121.726, "dur": 316.663, + "args": { + "External id": 993434,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942364468.041, "dur": 2.249, + "args": { + "External id": 993435,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11418, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942364535.519, "dur": 7.150, + "args": { + "External id": 993436,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942364555.716, "dur": 36.259, + "args": { + "External id": 993437,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942364602.445, "dur": 3.485, + "args": { + "External id": 993438,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942364611.444, "dur": 14.188, + "args": { + "External id": 993439,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942364631.365, "dur": 0.784, + "args": { + "External id": 993440,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942364636.379, "dur": 11.577, + "args": { + "External id": 993441,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942364653.375, "dur": 0.720, + "args": { + "External id": 993442,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942364658.454, "dur": 13.272, + "args": { + "External id": 993443,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942364676.347, "dur": 0.692, + "args": { + "External id": 993444,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942364680.999, "dur": 11.293, + "args": { + "External id": 993445,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942364696.575, "dur": 0.837, + "args": { + "External id": 993446,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942364701.288, "dur": 13.069, + "args": { + "External id": 993447,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942364720.152, "dur": 0.693, + "args": { + "External id": 993448,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942364725.165, "dur": 10.788, + "args": { + "External id": 993449,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942364740.334, "dur": 0.615, + "args": { + "External id": 993450,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942364745.126, "dur": 12.022, + "args": { + "External id": 993451,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942364761.178, "dur": 0.650, + "args": { + "External id": 993452,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942364765.889, "dur": 10.977, + "args": { + "External id": 993453,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942364868.417, "dur": 2944.609, + "args": { + "External id": 993454,"Record function id": 0, "Ev Idx": 11437 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.1)", "pid": 2338706, "tid": 2379422, + "ts": 6345942364889.172, "dur": 1101.449, + "args": { + "External id": 993455,"Record function id": 0, "Ev Idx": 11438 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338706, "tid": 2379422, + "ts": 6345942364903.673, "dur": 392.697, + "args": { + "External id": 993456,"Record function id": 0, "Ev Idx": 11439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942364987.639, "dur": 6.366, + "args": { + "External id": 993457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942364997.326, "dur": 0.940, + "args": { + "External id": 993458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942365001.956, "dur": 1.128, + "args": { + "External id": 993459,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942365004.974, "dur": 0.675, + "args": { + "External id": 993460,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942365024.052, "dur": 1.696, + "args": { + "External id": 993461,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942365029.293, "dur": 0.980, + "args": { + "External id": 993462,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942365032.894, "dur": 1.014, + "args": { + "External id": 993463,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942365035.416, "dur": 1.908, + "args": { + "External id": 993464,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942365038.837, "dur": 3.018, + "args": { + "External id": 993465,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942365043.707, "dur": 0.820, + "args": { + "External id": 993466,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942365106.939, "dur": 152.875, + "args": { + "External id": 993467,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942365124.738, "dur": 130.134, + "args": { + "External id": 993468,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942365140.507, "dur": 17.163, + "args": { + "External id": 993469,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345942365161.647, "dur": 65.954, + "args": { + "External id": 993470,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942365164.470, "dur": 62.800, + "args": { + "External id": 993471,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365168.452, "dur": 5.495, + "args": { + "External id": 993472,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942365175.596, "dur": 50.826, + "args": { + "External id": 993473,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11456 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.0", "pid": 2338706, "tid": 2379422, + "ts": 6345942365395.305, "dur": 587.909, + "args": { + "External id": 993474,"Record function id": 0, "Ev Idx": 11457 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338706, "tid": 2379422, + "ts": 6345942365415.682, "dur": 554.216, + "args": { + "External id": 993475,"Record function id": 0, "Ev Idx": 11458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942365482.559, "dur": 4.677, + "args": { + "External id": 993476,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345942365503.161, "dur": 27.264, + "args": { + "External id": 993477,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365508.438, "dur": 1.663, + "args": { + "External id": 993478,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365511.726, "dur": 0.418, + "args": { + "External id": 993479,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365513.838, "dur": 0.551, + "args": { + "External id": 993480,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365516.241, "dur": 2.324, + "args": { + "External id": 993481,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365519.715, "dur": 0.357, + "args": { + "External id": 993482,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365521.094, "dur": 0.346, + "args": { + "External id": 993483,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365523.212, "dur": 0.423, + "args": { + "External id": 993484,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365524.475, "dur": 0.382, + "args": { + "External id": 993485,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365526.261, "dur": 0.315, + "args": { + "External id": 993486,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942365540.853, "dur": 40.685, + "args": { + "External id": 993487,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2379422, + "ts": 6345942365611.599, "dur": 128.456, + "args": { + "External id": 993488,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 11471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942365621.165, "dur": 3.095, + "args": { + "External id": 993489,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2379422, + "ts": 6345942365629.525, "dur": 10.163, + "args": { + "External id": 993490,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2379422, + "ts": 6345942365633.894, "dur": 5.375, + "args": { + "External id": 993491,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 11474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365637.525, "dur": 0.502, + "args": { + "External id": 993492,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2379422, + "ts": 6345942365646.124, "dur": 23.958, + "args": { + "External id": 993493,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365648.013, "dur": 2.824, + "args": { + "External id": 993494,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365652.082, "dur": 0.523, + "args": { + "External id": 993495,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365654.402, "dur": 0.464, + "args": { + "External id": 993496,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365656.548, "dur": 0.593, + "args": { + "External id": 993497,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365658.221, "dur": 0.336, + "args": { + "External id": 993498,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365659.749, "dur": 0.264, + "args": { + "External id": 993499,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365661.676, "dur": 0.373, + "args": { + "External id": 993500,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365662.912, "dur": 0.348, + "args": { + "External id": 993501,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942365664.335, "dur": 2.462, + "args": { + "External id": 993502,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942365701.436, "dur": 30.161, + "args": { + "External id": 993503,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345942365788.208, "dur": 114.202, + "args": { + "External id": 993504,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 11487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942365814.561, "dur": 84.309, + "args": { + "External id": 993505,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11488, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2379422, + "ts": 6345942365824.273, "dur": 70.315, + "args": { + "External id": 993506,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 11489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942365918.088, "dur": 1.962, + "args": { + "External id": 993507,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11490, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942365997.275, "dur": 1792.473, + "args": { + "External id": 993508,"Sequence number": 10552475, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11491 + } + }, + { + "ph": "f", "id": 444, "pid": 2338706, "tid": 2379422, "ts": 6345942365997.275, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942366166.531, "dur": 107.592, + "args": { + "External id": 993509,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345942366319.590, "dur": 41.984, + "args": { + "External id": 993510,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345942366381.167, "dur": 47.570, + "args": { + "External id": 993511,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942366438.805, "dur": 32.072, + "args": { + "External id": 993512,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942366477.357, "dur": 32.346, + "args": { + "External id": 993513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942366517.090, "dur": 28.384, + "args": { + "External id": 993514,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942366555.047, "dur": 28.484, + "args": { + "External id": 993515,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345942366610.087, "dur": 25.016, + "args": { + "External id": 993516,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345942366652.965, "dur": 30.057, + "args": { + "External id": 993517,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942366704.947, "dur": 20.403, + "args": { + "External id": 993518,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942366738.812, "dur": 17.505, + "args": { + "External id": 993519,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942366766.540, "dur": 37.359, + "args": { + "External id": 993520,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942366807.474, "dur": 34.803, + "args": { + "External id": 993521,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345942366872.004, "dur": 317.022, + "args": { + "External id": 993522,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942366950.963, "dur": 6.504, + "args": { + "External id": 993523,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942366959.219, "dur": 3.327, + "args": { + "External id": 993524,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942366963.812, "dur": 2.211, + "args": { + "External id": 993525,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942366967.042, "dur": 4.601, + "args": { + "External id": 993526,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345942367037.763, "dur": 5.483, + "args": { + "External id": 993527,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942367039.917, "dur": 2.945, + "args": { + "External id": 993528,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345942367044.911, "dur": 71.295, + "args": { + "External id": 993529,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942367050.729, "dur": 35.810, + "args": { + "External id": 993530,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345942367118.641, "dur": 2.225, + "args": { + "External id": 993531,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942367119.693, "dur": 1.063, + "args": { + "External id": 993532,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345942367121.998, "dur": 16.945, + "args": { + "External id": 993533,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942367124.080, "dur": 0.610, + "args": { + "External id": 993534,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345942367228.107, "dur": 28.226, + "args": { + "External id": 993535,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942367273.873, "dur": 18.781, + "args": { + "External id": 993536,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942367301.537, "dur": 49.940, + "args": { + "External id": 993537,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942367358.767, "dur": 40.618, + "args": { + "External id": 993538,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942367410.427, "dur": 21.870, + "args": { + "External id": 993539,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942367438.626, "dur": 32.524, + "args": { + "External id": 993540,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942367478.639, "dur": 33.340, + "args": { + "External id": 993541,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942367519.739, "dur": 48.386, + "args": { + "External id": 993542,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345942367593.507, "dur": 27.703, + "args": { + "External id": 993543,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942367636.763, "dur": 28.772, + "args": { + "External id": 993544,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942367679.737, "dur": 18.209, + "args": { + "External id": 993545,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942367715.201, "dur": 14.641, + "args": { + "External id": 993546,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345942367741.455, "dur": 17.293, + "args": { + "External id": 993547,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942367835.156, "dur": 15.812, + "args": { + "External id": 993548,"Record function id": 0, "Ev Idx": 11531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942367838.956, "dur": 11.068, + "args": { + "External id": 993549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942367843.531, "dur": 5.612, + "args": { + "External id": 993550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942367845.146, "dur": 3.872, + "args": { + "External id": 993551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942367854.888, "dur": 5.385, + "args": { + "External id": 993552,"Record function id": 0, "Ev Idx": 11535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942367856.524, "dur": 3.271, + "args": { + "External id": 993553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942367857.117, "dur": 2.099, + "args": { + "External id": 993554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942367857.847, "dur": 1.228, + "args": { + "External id": 993555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942367863.570, "dur": 5.081, + "args": { + "External id": 993556,"Record function id": 0, "Ev Idx": 11539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942367865.275, "dur": 2.840, + "args": { + "External id": 993557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942367865.940, "dur": 1.677, + "args": { + "External id": 993558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942367866.722, "dur": 0.801, + "args": { + "External id": 993559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942367871.802, "dur": 4.069, + "args": { + "External id": 993560,"Record function id": 0, "Ev Idx": 11543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942367872.922, "dur": 2.532, + "args": { + "External id": 993561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942367873.439, "dur": 1.446, + "args": { + "External id": 993562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942367874.006, "dur": 0.803, + "args": { + "External id": 993563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942367878.925, "dur": 4.138, + "args": { + "External id": 993564,"Record function id": 0, "Ev Idx": 11547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942367880.584, "dur": 2.071, + "args": { + "External id": 993565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942367881.035, "dur": 1.178, + "args": { + "External id": 993566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942367881.452, "dur": 0.697, + "args": { + "External id": 993567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942367886.079, "dur": 6.098, + "args": { + "External id": 993568,"Record function id": 0, "Ev Idx": 11551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942367887.323, "dur": 4.362, + "args": { + "External id": 993569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942367887.819, "dur": 3.424, + "args": { + "External id": 993570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942367888.137, "dur": 3.032, + "args": { + "External id": 993571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942367895.359, "dur": 4.479, + "args": { + "External id": 993572,"Record function id": 0, "Ev Idx": 11555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942367897.137, "dur": 2.284, + "args": { + "External id": 993573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942367897.725, "dur": 1.286, + "args": { + "External id": 993574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942367898.178, "dur": 0.761, + "args": { + "External id": 993575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942367903.186, "dur": 3.960, + "args": { + "External id": 993576,"Record function id": 0, "Ev Idx": 11559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942367904.321, "dur": 2.388, + "args": { + "External id": 993577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942367904.788, "dur": 1.505, + "args": { + "External id": 993578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942367905.461, "dur": 0.708, + "args": { + "External id": 993579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942367910.330, "dur": 3.785, + "args": { + "External id": 993580,"Record function id": 0, "Ev Idx": 11563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942367911.487, "dur": 2.170, + "args": { + "External id": 993581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942367912.034, "dur": 0.957, + "args": { + "External id": 993582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942367912.363, "dur": 0.554, + "args": { + "External id": 993583,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942367918.031, "dur": 62547.413, + "args": { + "External id": 993584,"Record function id": 0, "Sequence number": 10552474, "Fwd thread id": 1, "Ev Idx": 11567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942367919.635, "dur": 62537.011, + "args": { + "External id": 993585,"Sequence number": 10552474, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11568 + } + }, + { + "ph": "f", "id": 445, "pid": 2338706, "tid": 2379422, "ts": 6345942367919.635, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.1)", "pid": 2338706, "tid": 2379422, + "ts": 6345942367948.722, "dur": 37.880, + "args": { + "External id": 993586,"Record function id": 0, "Ev Idx": 11569 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.1)", "pid": 2338706, "tid": 2379422, + "ts": 6345942367993.842, "dur": 128.574, + "args": { + "External id": 993587,"Record function id": 0, "Ev Idx": 11570 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.1)", "pid": 2338706, "tid": 2379422, + "ts": 6345942368131.069, "dur": 62317.704, + "args": { + "External id": 993588,"Record function id": 0, "Ev Idx": 11571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942368224.760, "dur": 7.943, + "args": { + "External id": 993589,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942368243.730, "dur": 4.982, + "args": { + "External id": 993590,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345942368263.323, "dur": 61184.714, + "args": { + "External id": 993591,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345942368277.906, "dur": 61156.732, + "args": { + "External id": 993592,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942368426.203, "dur": 19.286, + "args": { + "External id": 993593,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345942368464.509, "dur": 60924.274, + "args": { + "External id": 993594,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942368467.437, "dur": 60920.310, + "args": { + "External id": 993595,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942368472.450, "dur": 9.687, + "args": { + "External id": 993596,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942368484.225, "dur": 60898.520, + "args": { + "External id": 993597,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942429561.517, "dur": 12.925, + "args": { + "External id": 993598,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942429565.227, "dur": 8.855, + "args": { + "External id": 993599,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345942429607.384, "dur": 390.082, + "args": { + "External id": 993600,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942429635.545, "dur": 357.050, + "args": { + "External id": 993601,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11584, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345942429647.564, "dur": 339.721, + "args": { + "External id": 993602,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942430034.764, "dur": 3.746, + "args": { + "External id": 993603,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11586, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942430139.199, "dur": 7.316, + "args": { + "External id": 993604,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942430159.859, "dur": 39.673, + "args": { + "External id": 993605,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942430209.557, "dur": 1.449, + "args": { + "External id": 993606,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942430223.423, "dur": 14.485, + "args": { + "External id": 993607,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942430243.293, "dur": 1.165, + "args": { + "External id": 993608,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942430248.824, "dur": 11.378, + "args": { + "External id": 993609,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942430265.788, "dur": 0.911, + "args": { + "External id": 993610,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942430271.010, "dur": 12.792, + "args": { + "External id": 993611,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942430287.770, "dur": 2.653, + "args": { + "External id": 993612,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942430294.451, "dur": 12.012, + "args": { + "External id": 993613,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942430310.316, "dur": 0.948, + "args": { + "External id": 993614,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942430315.513, "dur": 12.576, + "args": { + "External id": 993615,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942430334.561, "dur": 0.669, + "args": { + "External id": 993616,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942430338.749, "dur": 11.564, + "args": { + "External id": 993617,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942430354.200, "dur": 0.633, + "args": { + "External id": 993618,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942430358.670, "dur": 12.113, + "args": { + "External id": 993619,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942430375.137, "dur": 0.786, + "args": { + "External id": 993620,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942430379.121, "dur": 11.328, + "args": { + "External id": 993621,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942430482.343, "dur": 2227.279, + "args": { + "External id": 993622,"Record function id": 0, "Ev Idx": 11605 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.0)", "pid": 2338706, "tid": 2379422, + "ts": 6345942430502.892, "dur": 422.196, + "args": { + "External id": 993623,"Record function id": 0, "Ev Idx": 11606 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338706, "tid": 2379422, + "ts": 6345942430518.243, "dur": 315.158, + "args": { + "External id": 993624,"Record function id": 0, "Ev Idx": 11607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942430602.452, "dur": 4.081, + "args": { + "External id": 993625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942430609.499, "dur": 0.970, + "args": { + "External id": 993626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942430612.812, "dur": 0.987, + "args": { + "External id": 993627,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942430615.907, "dur": 3.181, + "args": { + "External id": 993628,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942430620.886, "dur": 0.856, + "args": { + "External id": 993629,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942430625.119, "dur": 1.005, + "args": { + "External id": 993630,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942430627.718, "dur": 1.122, + "args": { + "External id": 993631,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942430630.279, "dur": 2.246, + "args": { + "External id": 993632,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942430634.074, "dur": 0.886, + "args": { + "External id": 993633,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942430637.682, "dur": 0.970, + "args": { + "External id": 993634,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942430657.609, "dur": 146.171, + "args": { + "External id": 993635,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942430674.236, "dur": 125.305, + "args": { + "External id": 993636,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942430689.048, "dur": 15.166, + "args": { + "External id": 993637,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345942430708.182, "dur": 64.087, + "args": { + "External id": 993638,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942430710.874, "dur": 61.085, + "args": { + "External id": 993639,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942430714.828, "dur": 8.243, + "args": { + "External id": 993640,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942430724.799, "dur": 46.491, + "args": { + "External id": 993641,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942430932.169, "dur": 1752.456, + "args": { + "External id": 993642,"Sequence number": 10552473, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11625 + } + }, + { + "ph": "f", "id": 446, "pid": 2338706, "tid": 2379422, "ts": 6345942430932.169, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942431096.828, "dur": 109.173, + "args": { + "External id": 993643,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338706, "tid": 2379422, + "ts": 6345942431250.768, "dur": 41.388, + "args": { + "External id": 993644,"kernel_hash": "cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwopwt4ur7jv5ehxyphohnjewkwckjorwzrlc2qu5z3edn3777m.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338706, "tid": 2379422, + "ts": 6345942431311.402, "dur": 49.001, + "args": { + "External id": 993645,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942431387.473, "dur": 34.196, + "args": { + "External id": 993646,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942431429.794, "dur": 34.756, + "args": { + "External id": 993647,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942431472.422, "dur": 28.552, + "args": { + "External id": 993648,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942431510.134, "dur": 30.261, + "args": { + "External id": 993649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338706, "tid": 2379422, + "ts": 6345942431564.287, "dur": 26.802, + "args": { + "External id": 993650,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338706, "tid": 2379422, + "ts": 6345942431608.127, "dur": 29.927, + "args": { + "External id": 993651,"kernel_hash": "cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/q2/cq2rd45bzuewzky5ru3epoa4jgdm2eh637bwzg2y3olumezvi2hx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942431660.351, "dur": 21.504, + "args": { + "External id": 993652,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942431694.917, "dur": 19.174, + "args": { + "External id": 993653,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942431723.849, "dur": 34.911, + "args": { + "External id": 993654,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942431762.495, "dur": 37.382, + "args": { + "External id": 993655,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338706, "tid": 2379422, + "ts": 6345942431829.984, "dur": 305.828, + "args": { + "External id": 993656,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942431908.418, "dur": 6.533, + "args": { + "External id": 993657,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942431917.061, "dur": 3.102, + "args": { + "External id": 993658,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942431921.599, "dur": 2.171, + "args": { + "External id": 993659,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942431924.751, "dur": 4.062, + "args": { + "External id": 993660,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345942431972.889, "dur": 5.022, + "args": { + "External id": 993661,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942431974.823, "dur": 2.908, + "args": { + "External id": 993662,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345942431979.664, "dur": 50.691, + "args": { + "External id": 993663,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942431985.085, "dur": 1.913, + "args": { + "External id": 993664,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2379422, + "ts": 6345942432033.501, "dur": 2.262, + "args": { + "External id": 993665,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942432034.653, "dur": 1.000, + "args": { + "External id": 993666,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2379422, + "ts": 6345942432036.803, "dur": 48.109, + "args": { + "External id": 993667,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942432039.120, "dur": 0.624, + "args": { + "External id": 993668,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2379422, + "ts": 6345942432175.798, "dur": 29.480, + "args": { + "External id": 993669,"kernel_hash": "cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/vl/cvl6m5botwvmbpqmwk6qhxr3434sm2t3rah42nj7xsdba5m7ab2o.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942432222.819, "dur": 16.613, + "args": { + "External id": 993670,"kernel_hash": "cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yo/cyohbdnrtrefnjluve25jrbzkn5nbdd44am66rrsra2mjfxk6odg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942432247.814, "dur": 49.595, + "args": { + "External id": 993671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942432304.425, "dur": 39.249, + "args": { + "External id": 993672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942432354.565, "dur": 21.053, + "args": { + "External id": 993673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942432382.279, "dur": 31.438, + "args": { + "External id": 993674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942432420.920, "dur": 27.573, + "args": { + "External id": 993675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2379422, + "ts": 6345942432456.000, "dur": 31.222, + "args": { + "External id": 993676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338706, "tid": 2379422, + "ts": 6345942432504.769, "dur": 24.364, + "args": { + "External id": 993677,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942432543.987, "dur": 23.237, + "args": { + "External id": 993678,"kernel_hash": "cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/df/cdfatgohdja6cpjscd5ulk47n5ozsecw6vgjqwodg5euk5mmpoix.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942432580.829, "dur": 16.618, + "args": { + "External id": 993679,"kernel_hash": "ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/tw/ctwj4phgp7e4c7awvtwtgq24cmf75exkour63vmsfhv5kzfdrrt3.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338706, "tid": 2379422, + "ts": 6345942432611.037, "dur": 14.147, + "args": { + "External id": 993680,"kernel_hash": "c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/47/c47dbscmmtr2kvomvxfi5h6hqbl4caacaijotzpx47lgoa5vxsmg.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338706, "tid": 2379422, + "ts": 6345942432637.404, "dur": 15.996, + "args": { + "External id": 993681,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942432732.418, "dur": 15.635, + "args": { + "External id": 993682,"Record function id": 0, "Ev Idx": 11665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942432736.330, "dur": 10.907, + "args": { + "External id": 993683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942432740.654, "dur": 5.638, + "args": { + "External id": 993684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942432742.273, "dur": 3.894, + "args": { + "External id": 993685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942432751.853, "dur": 5.943, + "args": { + "External id": 993686,"Record function id": 0, "Ev Idx": 11669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942432753.587, "dur": 3.783, + "args": { + "External id": 993687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942432754.679, "dur": 2.040, + "args": { + "External id": 993688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942432755.470, "dur": 1.156, + "args": { + "External id": 993689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942432761.056, "dur": 4.756, + "args": { + "External id": 993690,"Record function id": 0, "Ev Idx": 11673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942432762.682, "dur": 2.697, + "args": { + "External id": 993691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942432763.240, "dur": 1.679, + "args": { + "External id": 993692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942432763.885, "dur": 0.931, + "args": { + "External id": 993693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942432768.963, "dur": 4.613, + "args": { + "External id": 993694,"Record function id": 0, "Ev Idx": 11677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942432770.420, "dur": 2.703, + "args": { + "External id": 993695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942432771.327, "dur": 1.361, + "args": { + "External id": 993696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942432771.873, "dur": 0.741, + "args": { + "External id": 993697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942432776.585, "dur": 4.799, + "args": { + "External id": 993698,"Record function id": 0, "Ev Idx": 11681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942432778.367, "dur": 2.596, + "args": { + "External id": 993699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942432779.047, "dur": 1.275, + "args": { + "External id": 993700,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942432779.556, "dur": 0.701, + "args": { + "External id": 993701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942432784.466, "dur": 4.658, + "args": { + "External id": 993702,"Record function id": 0, "Ev Idx": 11685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942432786.012, "dur": 2.698, + "args": { + "External id": 993703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942432786.941, "dur": 1.211, + "args": { + "External id": 993704,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942432787.405, "dur": 0.667, + "args": { + "External id": 993705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942432792.272, "dur": 6.553, + "args": { + "External id": 993706,"Record function id": 0, "Ev Idx": 11689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942432794.025, "dur": 4.387, + "args": { + "External id": 993707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942432794.493, "dur": 3.507, + "args": { + "External id": 993708,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942432794.761, "dur": 3.152, + "args": { + "External id": 993709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942432801.883, "dur": 4.659, + "args": { + "External id": 993710,"Record function id": 0, "Ev Idx": 11693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942432803.702, "dur": 2.372, + "args": { + "External id": 993711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942432804.218, "dur": 1.419, + "args": { + "External id": 993712,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942432804.697, "dur": 0.830, + "args": { + "External id": 993713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942432810.041, "dur": 4.022, + "args": { + "External id": 993714,"Record function id": 0, "Ev Idx": 11697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942432811.087, "dur": 2.535, + "args": { + "External id": 993715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942432811.882, "dur": 1.221, + "args": { + "External id": 993716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942432812.276, "dur": 0.760, + "args": { + "External id": 993717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942432818.480, "dur": 62014.449, + "args": { + "External id": 993718,"Record function id": 0, "Sequence number": 10552472, "Fwd thread id": 1, "Ev Idx": 11701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942432820.139, "dur": 62003.873, + "args": { + "External id": 993719,"Sequence number": 10552472, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11702 + } + }, + { + "ph": "f", "id": 447, "pid": 2338706, "tid": 2379422, "ts": 6345942432820.139, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.0)", "pid": 2338706, "tid": 2379422, + "ts": 6345942432849.440, "dur": 39.145, + "args": { + "External id": 993720,"Record function id": 0, "Ev Idx": 11703 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.0)", "pid": 2338706, "tid": 2379422, + "ts": 6345942432896.736, "dur": 70.886, + "args": { + "External id": 993721,"Record function id": 0, "Ev Idx": 11704 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.0)", "pid": 2338706, "tid": 2379422, + "ts": 6345942432973.201, "dur": 61842.802, + "args": { + "External id": 993722,"Record function id": 0, "Ev Idx": 11705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942433137.961, "dur": 12.579, + "args": { + "External id": 993723,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942433163.843, "dur": 5.818, + "args": { + "External id": 993724,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345942433186.229, "dur": 60665.144, + "args": { + "External id": 993725,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345942433200.821, "dur": 60637.358, + "args": { + "External id": 993726,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942433313.569, "dur": 18.473, + "args": { + "External id": 993727,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345942433354.322, "dur": 60441.745, + "args": { + "External id": 993728,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942433357.468, "dur": 60437.525, + "args": { + "External id": 993729,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942433362.605, "dur": 8.151, + "args": { + "External id": 993730,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942433372.888, "dur": 60417.059, + "args": { + "External id": 993731,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942493964.395, "dur": 11.790, + "args": { + "External id": 993732,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942493968.005, "dur": 7.782, + "args": { + "External id": 993733,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345942494023.227, "dur": 394.271, + "args": { + "External id": 993734,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942494080.029, "dur": 331.960, + "args": { + "External id": 993735,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11718, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345942494094.627, "dur": 311.508, + "args": { + "External id": 993736,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942494440.216, "dur": 2.230, + "args": { + "External id": 993737,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11720, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942494506.830, "dur": 7.078, + "args": { + "External id": 993738,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942494526.918, "dur": 35.801, + "args": { + "External id": 993739,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942494573.643, "dur": 1.602, + "args": { + "External id": 993740,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942494581.632, "dur": 13.974, + "args": { + "External id": 993741,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942494601.052, "dur": 1.109, + "args": { + "External id": 993742,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942494607.369, "dur": 12.452, + "args": { + "External id": 993743,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942494625.190, "dur": 0.973, + "args": { + "External id": 993744,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942494629.969, "dur": 14.278, + "args": { + "External id": 993745,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942494649.010, "dur": 2.780, + "args": { + "External id": 993746,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942494655.425, "dur": 11.864, + "args": { + "External id": 993747,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942494672.551, "dur": 0.923, + "args": { + "External id": 993748,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942494676.985, "dur": 12.603, + "args": { + "External id": 993749,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942494695.758, "dur": 1.191, + "args": { + "External id": 993750,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942494700.783, "dur": 10.771, + "args": { + "External id": 993751,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942494715.816, "dur": 1.070, + "args": { + "External id": 993752,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942494720.595, "dur": 12.328, + "args": { + "External id": 993753,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942494737.892, "dur": 0.852, + "args": { + "External id": 993754,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942494742.381, "dur": 11.002, + "args": { + "External id": 993755,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942494847.630, "dur": 343.635, + "args": { + "External id": 993756,"Record function id": 0, "Sequence number": 10552471, "Fwd thread id": 1, "Ev Idx": 11739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338706, "tid": 2379422, + "ts": 6345942494850.808, "dur": 330.886, + "args": { + "External id": 993757,"Sequence number": 10552471, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11740 + } + }, + { + "ph": "f", "id": 448, "pid": 2338706, "tid": 2379422, "ts": 6345942494850.808, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_0", "pid": 2338706, "tid": 2379422, + "ts": 6345942494978.328, "dur": 57.320, + "args": { + "External id": 993758,"kernel_hash": "cwi477k6wdh6lokdagawsji5sufpvs7jjfv77xwkpv6wyl2co2tv", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/wi/cwi477k6wdh6lokdagawsji5sufpvs7jjfv77xwkpv6wyl2co2tv.py", "kernel_backend": "triton", "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 11741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_1", "pid": 2338706, "tid": 2379422, + "ts": 6345942495086.015, "dur": 31.832, + "args": { + "External id": 993759,"kernel_hash": "cdaibgaofwvuhdtnzghakbdinq55v26czvh7dtqt726q3xahddo3", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/da/cdaibgaofwvuhdtnzghakbdinq55v26czvh7dtqt726q3xahddo3.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096, 4096], [32000, 4096], []], "Ev Idx": 11742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_2", "pid": 2338706, "tid": 2379422, + "ts": 6345942495142.143, "dur": 22.562, + "args": { + "External id": 993760,"kernel_hash": "cozpxeylo6qxtrc76ucyd7ba7bcr4ctg436exqn3bklybds2girm", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/oz/cozpxeylo6qxtrc76ucyd7ba7bcr4ctg436exqn3bklybds2girm.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 11743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942495203.950, "dur": 18.173, + "args": { + "External id": 993761,"Record function id": 0, "Ev Idx": 11744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338706, "tid": 2379422, + "ts": 6345942495207.271, "dur": 13.800, + "args": { + "External id": 993762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 11745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942495211.432, "dur": 8.747, + "args": { + "External id": 993763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 11746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2379422, + "ts": 6345942495213.435, "dur": 6.621, + "args": { + "External id": 993764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 11747 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_post_backward_callback", "pid": 2338706, "tid": 2379422, + "ts": 6345942495245.125, "dur": 17417.428, + "args": { + "External id": 993765,"Record function id": 0, "Ev Idx": 11748 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate", "pid": 2338706, "tid": 2379422, + "ts": 6345942495265.397, "dur": 37.264, + "args": { + "External id": 993766,"Record function id": 0, "Ev Idx": 11749 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard", "pid": 2338706, "tid": 2379422, + "ts": 6345942495309.351, "dur": 276.201, + "args": { + "External id": 993767,"Record function id": 0, "Ev Idx": 11750 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce", "pid": 2338706, "tid": 2379422, + "ts": 6345942495591.693, "dur": 16819.929, + "args": { + "External id": 993768,"Record function id": 0, "Ev Idx": 11751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942495712.364, "dur": 7.465, + "args": { + "External id": 993769,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2379422, + "ts": 6345942495729.833, "dur": 5.326, + "args": { + "External id": 993770,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 11753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345942495756.881, "dur": 14896.300, + "args": { + "External id": 993771,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 11754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338706, "tid": 2379422, + "ts": 6345942495800.219, "dur": 14837.260, + "args": { + "External id": 993772,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 11755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942496478.441, "dur": 25.007, + "args": { + "External id": 993773,"Record function id": 0, "Concrete Inputs": ["[277237]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2379422, + "ts": 6345942496724.862, "dur": 13859.749, + "args": { + "External id": 993774,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], [], []], "Ev Idx": 11757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2379422, + "ts": 6345942496729.445, "dur": 13853.994, + "args": { + "External id": 993775,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], []], "Ev Idx": 11758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942496735.825, "dur": 15.411, + "args": { + "External id": 993776,"Record function id": 0, "Concrete Inputs": ["[277237]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2379422, + "ts": 6345942496754.142, "dur": 13822.818, + "args": { + "External id": 993777,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[277237], [277237], []], "Ev Idx": 11760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942510803.782, "dur": 12.621, + "args": { + "External id": 993778,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1134596096], [], [], [], [], []], "Ev Idx": 11761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2379422, + "ts": 6345942510808.258, "dur": 7.708, + "args": { + "External id": 993779,"Record function id": 0, "Concrete Inputs": ["[141824512]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338706, "tid": 2379422, + "ts": 6345942510847.516, "dur": 426.548, + "args": { + "External id": 993780,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[141824512], [1134596096], [], [], [], []], "Ev Idx": 11763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942510873.259, "dur": 394.774, + "args": { + "External id": 993781,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 141824512, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1134596096], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11764, "In msg nelems": 1134596096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338706, "tid": 2379422, + "ts": 6345942510884.512, "dur": 376.632, + "args": { + "External id": 993782,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1134596096]], "Ev Idx": 11765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2379422, + "ts": 6345942511294.370, "dur": 2.564, + "args": { + "External id": 993783,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11766, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511358.937, "dur": 7.277, + "args": { + "External id": 993784,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511379.318, "dur": 32.607, + "args": { + "External id": 993785,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4000, 4096], [4000, 4096], []], "Ev Idx": 11768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511422.110, "dur": 1.683, + "args": { + "External id": 993786,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511430.120, "dur": 14.704, + "args": { + "External id": 993787,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511450.669, "dur": 1.196, + "args": { + "External id": 993788,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "16384512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511456.356, "dur": 11.316, + "args": { + "External id": 993789,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511473.115, "dur": 1.286, + "args": { + "External id": 993790,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "18481664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511479.600, "dur": 12.812, + "args": { + "External id": 993791,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511497.154, "dur": 3.286, + "args": { + "External id": 993792,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "19005952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511556.554, "dur": 13.439, + "args": { + "External id": 993793,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511576.037, "dur": 1.065, + "args": { + "External id": 993794,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "19530240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511581.028, "dur": 12.004, + "args": { + "External id": 993795,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511597.221, "dur": 1.361, + "args": { + "External id": 993796,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511602.917, "dur": 12.031, + "args": { + "External id": 993797,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511618.987, "dur": 0.694, + "args": { + "External id": 993798,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "21627904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511623.431, "dur": 12.583, + "args": { + "External id": 993799,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511641.804, "dur": 0.728, + "args": { + "External id": 993800,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "28967936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511646.528, "dur": 11.853, + "args": { + "External id": 993801,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511662.153, "dur": 0.686, + "args": { + "External id": 993802,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "36307968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511666.735, "dur": 12.293, + "args": { + "External id": 993803,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511682.945, "dur": 0.798, + "args": { + "External id": 993804,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511687.325, "dur": 10.914, + "args": { + "External id": 993805,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511702.498, "dur": 0.870, + "args": { + "External id": 993806,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "43648512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511707.036, "dur": 12.381, + "args": { + "External id": 993807,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511723.723, "dur": 2.712, + "args": { + "External id": 993808,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "45745664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511730.548, "dur": 10.902, + "args": { + "External id": 993809,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511747.512, "dur": 0.946, + "args": { + "External id": 993810,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "46269952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511752.558, "dur": 11.100, + "args": { + "External id": 993811,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511768.290, "dur": 0.905, + "args": { + "External id": 993812,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "46794240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511775.275, "dur": 10.201, + "args": { + "External id": 993813,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511790.216, "dur": 0.883, + "args": { + "External id": 993814,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511795.270, "dur": 10.154, + "args": { + "External id": 993815,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511809.717, "dur": 1.079, + "args": { + "External id": 993816,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "48891904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511814.948, "dur": 10.109, + "args": { + "External id": 993817,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511830.017, "dur": 1.133, + "args": { + "External id": 993818,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "56231936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511835.361, "dur": 10.322, + "args": { + "External id": 993819,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511849.740, "dur": 0.940, + "args": { + "External id": 993820,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "63571968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511854.648, "dur": 10.751, + "args": { + "External id": 993821,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511869.479, "dur": 0.813, + "args": { + "External id": 993822,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511874.148, "dur": 12.180, + "args": { + "External id": 993823,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511890.910, "dur": 2.792, + "args": { + "External id": 993824,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "70912512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511897.363, "dur": 10.565, + "args": { + "External id": 993825,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511912.059, "dur": 0.821, + "args": { + "External id": 993826,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73009664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511917.025, "dur": 10.196, + "args": { + "External id": 993827,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511931.621, "dur": 0.728, + "args": { + "External id": 993828,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73533952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511936.410, "dur": 9.378, + "args": { + "External id": 993829,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511951.219, "dur": 0.980, + "args": { + "External id": 993830,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "74058240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511956.592, "dur": 10.332, + "args": { + "External id": 993831,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511971.535, "dur": 0.720, + "args": { + "External id": 993832,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511976.221, "dur": 10.005, + "args": { + "External id": 993833,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942511990.609, "dur": 0.934, + "args": { + "External id": 993834,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "76155904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942511995.914, "dur": 10.414, + "args": { + "External id": 993835,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942512026.838, "dur": 1.473, + "args": { + "External id": 993836,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "83495936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942512033.738, "dur": 12.997, + "args": { + "External id": 993837,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942512052.034, "dur": 39.409, + "args": { + "External id": 993838,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "90835968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942512098.842, "dur": 14.163, + "args": { + "External id": 993839,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942512119.632, "dur": 3.033, + "args": { + "External id": 993840,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942512126.316, "dur": 10.232, + "args": { + "External id": 993841,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942512142.039, "dur": 0.842, + "args": { + "External id": 993842,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "98176512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942512147.389, "dur": 10.245, + "args": { + "External id": 993843,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942512162.385, "dur": 0.978, + "args": { + "External id": 993844,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100273664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942512167.200, "dur": 8.989, + "args": { + "External id": 993845,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942512181.293, "dur": 0.835, + "args": { + "External id": 993846,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100797952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942512185.963, "dur": 10.298, + "args": { + "External id": 993847,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942512200.856, "dur": 0.838, + "args": { + "External id": 993848,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "101322240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942512205.936, "dur": 9.634, + "args": { + "External id": 993849,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942512220.872, "dur": 0.894, + "args": { + "External id": 993850,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942512226.357, "dur": 11.258, + "args": { + "External id": 993851,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942512243.216, "dur": 0.793, + "args": { + "External id": 993852,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "103419904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942512248.221, "dur": 11.784, + "args": { + "External id": 993853,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942512264.245, "dur": 0.876, + "args": { + "External id": 993854,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "110759936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942512269.836, "dur": 12.539, + "args": { + "External id": 993855,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942512286.836, "dur": 2.754, + "args": { + "External id": 993856,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "118099968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942512293.832, "dur": 10.797, + "args": { + "External id": 993857,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942512309.407, "dur": 0.876, + "args": { + "External id": 993858,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942512314.754, "dur": 11.514, + "args": { + "External id": 993859,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2379422, + "ts": 6345942512330.123, "dur": 0.963, + "args": { + "External id": 993860,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "125440512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2379422, + "ts": 6345942512335.950, "dur": 12.002, + "args": { + "External id": 993861,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4000, 4096], [4000, 4096], []], "Ev Idx": 11844 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "ProfilerStep#22527", "pid": 2338706, "tid": 2338706, + "ts": 6345936068847.077, "dur": 6474155.179, + "args": { + "External id": 972801,"Record function id": 0, "Ev Idx": 11845 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.zero_grad#AdamW.zero_grad", "pid": 2338706, "tid": 2338706, + "ts": 6345936068884.349, "dur": 979.006, + "args": { + "External id": 972802,"Record function id": 0, "Ev Idx": 11846 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "enumerate(DataLoader)#_StatefulMultiProcessingDataLoaderIter.__next__", "pid": 2338706, "tid": 2338706, + "ts": 6345936069929.158, "dur": 262.038, + "args": { + "External id": 972803,"Record function id": 0, "Ev Idx": 11847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936070608.644, "dur": 21.414, + "args": { + "External id": 972804,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 11848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936070622.218, "dur": 3.139, + "args": { + "External id": 972805,"Record function id": 0, "Concrete Inputs": ["", "[8, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 11849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936070631.992, "dur": 5.259, + "args": { + "External id": 972806,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 11850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936070634.954, "dur": 1.157, + "args": { + "External id": 972807,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 11851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936070667.853, "dur": 4371.669, + "args": { + "External id": 972808,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], [], []], "Ev Idx": 11852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936070676.968, "dur": 4362.007, + "args": { + "External id": 972809,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 11853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936070688.004, "dur": 11.144, + "args": { + "External id": 972810,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936070701.639, "dur": 4335.964, + "args": { + "External id": 972811,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936070712.629, "dur": 0.458, + "args": { + "External id": 972812,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 11856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936070716.691, "dur": 12.533, + "args": { + "External id": 972813,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[8192, 1], [4096, 1]], "Input Dims": [[8, 4096], [8, 4096]], "Ev Idx": 11857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338706, "tid": 2338706, + "ts": 6345936070723.292, "dur": 5.744, + "args": { + "External id": 972814,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], []], "Input Dims": [[8, 4096], [], []], "Ev Idx": 11858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936070725.753, "dur": 2.806, + "args": { + "External id": 972815,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 11859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2338706, + "ts": 6345936070731.863, "dur": 146.960, + "args": { + "External id": 972816,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 11860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2338706, + "ts": 6345936070734.647, "dur": 143.806, + "args": { + "External id": 972817,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 11861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936070737.547, "dur": 11.324, + "args": { + "External id": 972818,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 11862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936070743.060, "dur": 5.273, + "args": { + "External id": 972819,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936070749.888, "dur": 128.199, + "args": { + "External id": 972820,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936070881.938, "dur": 4151.710, + "args": { + "External id": 972821,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936075087.577, "dur": 372.838, + "args": { + "External id": 972822,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 11866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936075090.848, "dur": 369.014, + "args": { + "External id": 972823,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], []], "Ev Idx": 11867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936075096.140, "dur": 10.818, + "args": { + "External id": 972824,"Record function id": 0, "Concrete Inputs": ["[8, 8192]", "[8192, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936075109.581, "dur": 348.754, + "args": { + "External id": 972825,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[8192, 1], [8192, 1], []], "Input Dims": [[8, 8192], [8, 8192], []], "Ev Idx": 11869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338706, "tid": 2338706, + "ts": 6345936075489.946, "dur": 58.140, + "args": { + "External id": 972826,"Record function id": 0, "Concrete Inputs": ["0", "4096", "", "", "", "False"], "Input type": ["Scalar", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936075495.111, "dur": 4.310, + "args": { + "External id": 972827,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338706, "tid": 2338706, + "ts": 6345936075501.986, "dur": 45.509, + "args": { + "External id": 972828,"Record function id": 0, "Concrete Inputs": ["0", "4096", "1", ""], "Input type": ["Scalar", "Scalar", "Scalar", "long int"], "Input Strides": [[], [], [], [1]], "Input Dims": [[], [], [], [0]], "Ev Idx": 11872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345936075509.570, "dur": 7.986, + "args": { + "External id": 972829,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["long int", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 11873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::repeat", "pid": 2338706, "tid": 2338706, + "ts": 6345936075559.964, "dur": 83.519, + "args": { + "External id": 972830,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338706, "tid": 2338706, + "ts": 6345936075566.127, "dur": 8.627, + "args": { + "External id": 972831,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[4096], [], []], "Ev Idx": 11875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936075572.508, "dur": 1.895, + "args": { + "External id": 972832,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "[4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 11876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936075579.075, "dur": 4.000, + "args": { + "External id": 972833,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2338706, + "ts": 6345936075585.484, "dur": 4.637, + "args": { + "External id": 972834,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[4096, 1]], "Input Dims": [[8, 4096]], "Ev Idx": 11878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338706, "tid": 2338706, + "ts": 6345936075593.207, "dur": 9.283, + "args": { + "External id": 972835,"Record function id": 0, "Concrete Inputs": ["", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 11879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936075601.532, "dur": 0.752, + "args": { + "External id": 972836,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1]", "[4096, 1, 4096]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 11880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338706, "tid": 2338706, + "ts": 6345936075604.451, "dur": 2.706, + "args": { + "External id": 972837,"Record function id": 0, "Concrete Inputs": ["", "1", "4096", "4096"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 11881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936075606.127, "dur": 0.913, + "args": { + "External id": 972838,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[4096, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 11882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936075609.571, "dur": 4.257, + "args": { + "External id": 972839,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[4096, 1], [4096, 4096, 4096, 1]], "Input Dims": [[1, 4096], [8, 1, 1, 4096]], "Ev Idx": 11883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338706, "tid": 2338706, + "ts": 6345936075611.074, "dur": 2.601, + "args": { + "External id": 972840,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1, 4096], [], []], "Ev Idx": 11884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936075612.775, "dur": 0.775, + "args": { + "External id": 972841,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[0, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 11885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936075614.680, "dur": 28.055, + "args": { + "External id": 972842,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 4096, 4096, 1], [0, 4096, 4096, 1], []], "Input Dims": [[8, 1, 1, 4096], [8, 1, 1, 4096], []], "Ev Idx": 11886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936075652.410, "dur": 39.292, + "args": { + "External id": 972843,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 11887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936075657.018, "dur": 34.459, + "args": { + "External id": 972844,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 11888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936075663.588, "dur": 4.659, + "args": { + "External id": 972845,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "3", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936075669.391, "dur": 21.411, + "args": { + "External id": 972846,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11890 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_pre_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936075836.668, "dur": 205.928, + "args": { + "External id": 972847,"Record function id": 0, "Ev Idx": 11891 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::inputs_to_device", "pid": 2338706, "tid": 2338706, + "ts": 6345936075941.032, "dur": 65.727, + "args": { + "External id": 972848,"Record function id": 0, "Ev Idx": 11892 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936076051.930, "dur": 98.394, + "args": { + "External id": 972849,"Record function id": 0, "Ev Idx": 11893 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936076166.704, "dur": 12871.818, + "args": { + "External id": 972850,"Record function id": 0, "Ev Idx": 11894 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather", "pid": 2338706, "tid": 2338706, + "ts": 6345936076176.553, "dur": 1617.752, + "args": { + "External id": 972851,"Record function id": 0, "Ev Idx": 11895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936076322.131, "dur": 13.116, + "args": { + "External id": 972852,"Record function id": 0, "Concrete Inputs": ["[141824512]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936076358.464, "dur": 151.708, + "args": { + "External id": 972853,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 11897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076364.659, "dur": 2.070, + "args": { + "External id": 972854,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076373.209, "dur": 0.488, + "args": { + "External id": 972855,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076374.681, "dur": 0.635, + "args": { + "External id": 972856,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "16384512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076376.120, "dur": 3.470, + "args": { + "External id": 972857,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "18481664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076384.581, "dur": 0.622, + "args": { + "External id": 972858,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19005952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076386.327, "dur": 0.580, + "args": { + "External id": 972859,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "19530240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076389.831, "dur": 2.336, + "args": { + "External id": 972860,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076393.201, "dur": 0.580, + "args": { + "External id": 972861,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "21627904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076394.354, "dur": 0.540, + "args": { + "External id": 972862,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "28967936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076400.078, "dur": 0.364, + "args": { + "External id": 972863,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "36307968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076401.093, "dur": 0.548, + "args": { + "External id": 972864,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076402.506, "dur": 2.925, + "args": { + "External id": 972865,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "43648512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076410.399, "dur": 0.489, + "args": { + "External id": 972866,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45745664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076411.649, "dur": 0.297, + "args": { + "External id": 972867,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46269952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076415.305, "dur": 2.303, + "args": { + "External id": 972868,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "46794240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076418.235, "dur": 0.548, + "args": { + "External id": 972869,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076419.574, "dur": 0.404, + "args": { + "External id": 972870,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "48891904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076425.644, "dur": 0.499, + "args": { + "External id": 972871,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "56231936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076426.910, "dur": 0.401, + "args": { + "External id": 972872,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "63571968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076428.058, "dur": 3.207, + "args": { + "External id": 972873,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076436.035, "dur": 0.476, + "args": { + "External id": 972874,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "70912512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076437.640, "dur": 0.406, + "args": { + "External id": 972875,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73009664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076440.730, "dur": 2.051, + "args": { + "External id": 972876,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73533952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076443.820, "dur": 0.361, + "args": { + "External id": 972877,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "74058240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076444.928, "dur": 0.269, + "args": { + "External id": 972878,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076450.359, "dur": 0.298, + "args": { + "External id": 972879,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "76155904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076451.398, "dur": 0.388, + "args": { + "External id": 972880,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "83495936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076452.757, "dur": 2.866, + "args": { + "External id": 972881,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "90835968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076461.704, "dur": 0.322, + "args": { + "External id": 972882,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076462.967, "dur": 0.417, + "args": { + "External id": 972883,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "98176512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076466.644, "dur": 2.093, + "args": { + "External id": 972884,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100273664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076469.561, "dur": 0.327, + "args": { + "External id": 972885,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100797952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076470.564, "dur": 0.436, + "args": { + "External id": 972886,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "101322240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076475.514, "dur": 0.263, + "args": { + "External id": 972887,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076476.336, "dur": 0.511, + "args": { + "External id": 972888,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "103419904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076477.590, "dur": 2.481, + "args": { + "External id": 972889,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "110759936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076484.538, "dur": 0.284, + "args": { + "External id": 972890,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "118099968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076485.964, "dur": 0.354, + "args": { + "External id": 972891,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076489.234, "dur": 2.007, + "args": { + "External id": 972892,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "125440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936076540.439, "dur": 168.916, + "args": { + "External id": 972893,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 11937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936076796.889, "dur": 451.686, + "args": { + "External id": 972894,"Record function id": 0, "Concrete Inputs": ["", "", "141824512", "8", "0", "15", ""], "Input type": ["TensorList", "", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 11938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936076815.899, "dur": 5.388, + "args": { + "External id": 972895,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936076832.902, "dur": 17.044, + "args": { + "External id": 972896,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "141824512"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 11940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936076839.115, "dur": 10.300, + "args": { + "External id": 972897,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "141824512", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[1134596096], [], [], [], []], "Ev Idx": 11941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076845.548, "dur": 0.791, + "args": { + "External id": 972898,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 11942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936076860.168, "dur": 132.944, + "args": { + "External id": 972899,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 11943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076862.247, "dur": 0.719, + "args": { + "External id": 972900,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076866.093, "dur": 0.663, + "args": { + "External id": 972901,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076867.572, "dur": 2.940, + "args": { + "External id": 972902,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "16384512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076871.252, "dur": 1.048, + "args": { + "External id": 972903,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "18481664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076876.096, "dur": 0.336, + "args": { + "External id": 972904,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19005952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076877.365, "dur": 0.487, + "args": { + "External id": 972905,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "19530240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076878.923, "dur": 0.289, + "args": { + "External id": 972906,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076882.483, "dur": 0.362, + "args": { + "External id": 972907,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "21627904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076883.646, "dur": 1.002, + "args": { + "External id": 972908,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "28967936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076888.102, "dur": 0.540, + "args": { + "External id": 972909,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "36307968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076889.595, "dur": 2.519, + "args": { + "External id": 972910,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076893.146, "dur": 0.500, + "args": { + "External id": 972911,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "43648512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076897.264, "dur": 2.395, + "args": { + "External id": 972912,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45745664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076900.664, "dur": 0.341, + "args": { + "External id": 972913,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46269952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076901.915, "dur": 0.406, + "args": { + "External id": 972914,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "46794240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076908.769, "dur": 0.500, + "args": { + "External id": 972915,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076909.866, "dur": 0.543, + "args": { + "External id": 972916,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "48891904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076913.532, "dur": 0.546, + "args": { + "External id": 972917,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "56231936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076916.090, "dur": 2.597, + "args": { + "External id": 972918,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "63571968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076919.455, "dur": 0.516, + "args": { + "External id": 972919,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076923.037, "dur": 1.972, + "args": { + "External id": 972920,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "70912512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076925.837, "dur": 0.282, + "args": { + "External id": 972921,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73009664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076926.897, "dur": 0.410, + "args": { + "External id": 972922,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73533952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076932.312, "dur": 0.394, + "args": { + "External id": 972923,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "74058240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076933.430, "dur": 0.407, + "args": { + "External id": 972924,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076936.871, "dur": 0.620, + "args": { + "External id": 972925,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "76155904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076940.152, "dur": 2.654, + "args": { + "External id": 972926,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "83495936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076943.846, "dur": 0.545, + "args": { + "External id": 972927,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "90835968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076947.792, "dur": 2.318, + "args": { + "External id": 972928,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076950.961, "dur": 0.346, + "args": { + "External id": 972929,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "98176512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076952.120, "dur": 0.326, + "args": { + "External id": 972930,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100273664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076957.388, "dur": 0.405, + "args": { + "External id": 972931,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100797952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076958.551, "dur": 0.349, + "args": { + "External id": 972932,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "101322240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076962.313, "dur": 0.485, + "args": { + "External id": 972933,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076964.963, "dur": 2.850, + "args": { + "External id": 972934,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "103419904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076968.576, "dur": 0.436, + "args": { + "External id": 972935,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "110759936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076972.127, "dur": 1.804, + "args": { + "External id": 972936,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "118099968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076974.778, "dur": 0.382, + "args": { + "External id": 972937,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936076975.904, "dur": 0.327, + "args": { + "External id": 972938,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "125440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936077046.179, "dur": 177.840, + "args": { + "External id": 972939,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 11983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936077326.090, "dur": 345.137, + "args": { + "External id": 972940,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[1134596096], [141824512], [], [], []], "Ev Idx": 11984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936077368.476, "dur": 297.133, + "args": { + "External id": 972941,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1134596096, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[141824512], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11985, "In msg nelems": 141824512 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936077380.101, "dur": 279.434, + "args": { + "External id": 972942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[141824512]], "Ev Idx": 11986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936077698.374, "dur": 3.121, + "args": { + "External id": 972943,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11987, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out", "pid": 2338706, "tid": 2338706, + "ts": 6345936077813.035, "dur": 10947.582, + "args": { + "External id": 972944,"Record function id": 0, "Ev Idx": 11988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078137.855, "dur": 8.239, + "args": { + "External id": 972945,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 11989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078151.216, "dur": 1.354, + "args": { + "External id": 972946,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 11990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078154.832, "dur": 1.344, + "args": { + "External id": 972947,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078160.894, "dur": 3.689, + "args": { + "External id": 972948,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078166.367, "dur": 1.217, + "args": { + "External id": 972949,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078169.112, "dur": 1.187, + "args": { + "External id": 972950,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078172.208, "dur": 1.078, + "args": { + "External id": 972951,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078179.231, "dur": 2.541, + "args": { + "External id": 972952,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078183.461, "dur": 1.116, + "args": { + "External id": 972953,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078186.027, "dur": 1.424, + "args": { + "External id": 972954,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078189.008, "dur": 0.836, + "args": { + "External id": 972955,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078193.504, "dur": 2.628, + "args": { + "External id": 972956,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078197.727, "dur": 0.878, + "args": { + "External id": 972957,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078200.370, "dur": 0.912, + "args": { + "External id": 972958,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078202.763, "dur": 0.989, + "args": { + "External id": 972959,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078210.227, "dur": 2.027, + "args": { + "External id": 972960,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078214.009, "dur": 0.843, + "args": { + "External id": 972961,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078216.460, "dur": 0.955, + "args": { + "External id": 972962,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078219.052, "dur": 1.160, + "args": { + "External id": 972963,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078224.467, "dur": 3.303, + "args": { + "External id": 972964,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078229.593, "dur": 0.897, + "args": { + "External id": 972965,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078231.982, "dur": 1.000, + "args": { + "External id": 972966,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078234.363, "dur": 1.050, + "args": { + "External id": 972967,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078241.526, "dur": 2.198, + "args": { + "External id": 972968,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078245.264, "dur": 0.870, + "args": { + "External id": 972969,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078247.658, "dur": 1.090, + "args": { + "External id": 972970,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078250.467, "dur": 0.727, + "args": { + "External id": 972971,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078255.203, "dur": 3.302, + "args": { + "External id": 972972,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078260.286, "dur": 0.941, + "args": { + "External id": 972973,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078262.817, "dur": 0.945, + "args": { + "External id": 972974,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078265.160, "dur": 1.037, + "args": { + "External id": 972975,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078272.949, "dur": 2.893, + "args": { + "External id": 972976,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078277.479, "dur": 1.069, + "args": { + "External id": 972977,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078280.155, "dur": 1.102, + "args": { + "External id": 972978,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078282.781, "dur": 0.969, + "args": { + "External id": 972979,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078287.702, "dur": 2.971, + "args": { + "External id": 972980,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078292.180, "dur": 0.976, + "args": { + "External id": 972981,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078314.577, "dur": 1.145, + "args": { + "External id": 972982,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078317.657, "dur": 1.952, + "args": { + "External id": 972983,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936078325.737, "dur": 2.094, + "args": { + "External id": 972984,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 12028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936078360.739, "dur": 10330.362, + "args": { + "External id": 972985,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 12029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936078390.138, "dur": 10288.215, + "args": { + "External id": 972986,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 12030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936078416.323, "dur": 7.523, + "args": { + "External id": 972987,"Record function id": 0, "Concrete Inputs": ["[4384]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936078430.156, "dur": 10194.056, + "args": { + "External id": 972988,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], [], []], "Ev Idx": 12032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936078433.183, "dur": 10190.189, + "args": { + "External id": 972989,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], []], "Ev Idx": 12033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936078441.007, "dur": 6.398, + "args": { + "External id": 972990,"Record function id": 0, "Concrete Inputs": ["[4384]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936078449.562, "dur": 10168.736, + "args": { + "External id": 972991,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4384], [4384], []], "Ev Idx": 12035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936089200.274, "dur": 45.578, + "args": { + "External id": 972992,"Record function id": 0, "Ev Idx": 12036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 0/0", "pid": 2338706, "tid": 2338706, + "ts": 6345936089248.875, "dur": 273.359, + "args": { + "External id": 972993,"Record function id": 0, "Ev Idx": 12037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936089299.287, "dur": 212.273, + "args": { + "External id": 972994,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "long int"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32000, 4096], [8, 4096]], "Ev Idx": 12038 + } + }, + { + "ph": "s", "id": 224, "pid": 2338706, "tid": 2338706, "ts": 6345936089299.287, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936089398.434, "dur": 66.378, + "args": { + "External id": 972995,"kernel_hash": "cwt66mnkcya5wyvezkdac4cj5ntiugvh6bs2jikth6qdzhubcefg", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wt/cwt66mnkcya5wyvezkdac4cj5ntiugvh6bs2jikth6qdzhubcefg.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096], [32000, 4096], [8, 4096, 4096], []], "Ev Idx": 12039 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936089596.620, "dur": 64.721, + "args": { + "External id": 972996,"Record function id": 0, "Ev Idx": 12040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.0)", "pid": 2338706, "tid": 2338706, + "ts": 6345936089674.589, "dur": 9317.944, + "args": { + "External id": 972997,"Record function id": 0, "Ev Idx": 12041 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338706, "tid": 2338706, + "ts": 6345936089686.152, "dur": 1098.360, + "args": { + "External id": 972998,"Record function id": 0, "Ev Idx": 12042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936089773.872, "dur": 14.743, + "args": { + "External id": 972999,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936089804.507, "dur": 48.752, + "args": { + "External id": 973000,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936089815.302, "dur": 2.565, + "args": { + "External id": 973001,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936089820.669, "dur": 0.584, + "args": { + "External id": 973002,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936089824.886, "dur": 2.093, + "args": { + "External id": 973003,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936089827.978, "dur": 0.574, + "args": { + "External id": 973004,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936089829.480, "dur": 0.695, + "args": { + "External id": 973005,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936089834.852, "dur": 0.256, + "args": { + "External id": 973006,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936089835.821, "dur": 0.640, + "args": { + "External id": 973007,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936089837.594, "dur": 2.818, + "args": { + "External id": 973008,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936089845.496, "dur": 0.363, + "args": { + "External id": 973009,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936089865.653, "dur": 61.039, + "args": { + "External id": 973010,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936089966.661, "dur": 252.436, + "args": { + "External id": 973011,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936089982.921, "dur": 4.569, + "args": { + "External id": 973012,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936089993.744, "dur": 11.075, + "args": { + "External id": 973013,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936089998.502, "dur": 5.760, + "args": { + "External id": 973014,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936090002.046, "dur": 0.770, + "args": { + "External id": 973015,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936090053.106, "dur": 97.910, + "args": { + "External id": 973016,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936090115.456, "dur": 0.939, + "args": { + "External id": 973017,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936090119.795, "dur": 0.604, + "args": { + "External id": 973018,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936090123.273, "dur": 0.852, + "args": { + "External id": 973019,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936090127.755, "dur": 0.533, + "args": { + "External id": 973020,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936090129.247, "dur": 4.486, + "args": { + "External id": 973021,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936090134.953, "dur": 0.605, + "args": { + "External id": 973022,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936090138.752, "dur": 0.716, + "args": { + "External id": 973023,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936090142.293, "dur": 0.274, + "args": { + "External id": 973024,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936090145.093, "dur": 0.389, + "args": { + "External id": 973025,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936090165.293, "dur": 41.947, + "args": { + "External id": 973026,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936090284.733, "dur": 391.086, + "args": { + "External id": 973027,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936090321.047, "dur": 349.219, + "args": { + "External id": 973028,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12072, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936090335.061, "dur": 328.100, + "args": { + "External id": 973029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936090702.195, "dur": 2.926, + "args": { + "External id": 973030,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12074, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338706, "tid": 2338706, + "ts": 6345936090806.779, "dur": 7902.457, + "args": { + "External id": 973031,"Record function id": 0, "Ev Idx": 12075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936090924.339, "dur": 6.678, + "args": { + "External id": 973032,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936090935.112, "dur": 1.439, + "args": { + "External id": 973033,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936090938.500, "dur": 1.286, + "args": { + "External id": 973034,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936090942.174, "dur": 3.915, + "args": { + "External id": 973035,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936090947.602, "dur": 1.346, + "args": { + "External id": 973036,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936090950.649, "dur": 0.976, + "args": { + "External id": 973037,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936090957.283, "dur": 1.204, + "args": { + "External id": 973038,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936090960.027, "dur": 3.231, + "args": { + "External id": 973039,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936090964.935, "dur": 1.147, + "args": { + "External id": 973040,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936090967.716, "dur": 1.082, + "args": { + "External id": 973041,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936090990.082, "dur": 7662.767, + "args": { + "External id": 973042,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936091046.245, "dur": 7595.076, + "args": { + "External id": 973043,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936091162.116, "dur": 19.547, + "args": { + "External id": 973044,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936091188.537, "dur": 7408.025, + "args": { + "External id": 973045,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936091191.687, "dur": 7403.158, + "args": { + "External id": 973046,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936091197.364, "dur": 10.722, + "args": { + "External id": 973047,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936091210.070, "dur": 7381.177, + "args": { + "External id": 973048,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936098914.282, "dur": 44.057, + "args": { + "External id": 973049,"Sequence number": 10552243, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12093 + } + }, + { + "ph": "s", "id": 223, "pid": 2338706, "tid": 2338706, "ts": 6345936098914.282, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936098936.500, "dur": 15.107, + "args": { + "External id": 973050,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936098944.369, "dur": 6.937, + "args": { + "External id": 973051,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936099117.222, "dur": 120.114, + "args": { + "External id": 973052,"Record function id": 0, "Ev Idx": 12096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936099239.299, "dur": 1396.475, + "args": { + "External id": 973053,"Record function id": 0, "Ev Idx": 12097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936099292.841, "dur": 1326.413, + "args": { + "External id": 973054,"Sequence number": 10552244, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12098 + } + }, + { + "ph": "s", "id": 222, "pid": 2338706, "tid": 2338706, "ts": 6345936099292.841, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936099385.082, "dur": 68.906, + "args": { + "External id": 973055,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936099473.376, "dur": 123.766, + "args": { + "External id": 973056,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936099612.096, "dur": 43.708, + "args": { + "External id": 973057,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936099666.987, "dur": 33.977, + "args": { + "External id": 973058,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936099736.701, "dur": 34.863, + "args": { + "External id": 973059,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936099795.017, "dur": 23.559, + "args": { + "External id": 973060,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936099844.490, "dur": 192.489, + "args": { + "External id": 973061,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936099909.792, "dur": 17.591, + "args": { + "External id": 973062,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936099916.143, "dur": 10.085, + "args": { + "External id": 973063,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936099933.064, "dur": 4.385, + "args": { + "External id": 973064,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936099939.709, "dur": 1.232, + "args": { + "External id": 973065,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936099944.146, "dur": 3.397, + "args": { + "External id": 973066,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936100054.847, "dur": 115.536, + "args": { + "External id": 973067,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936100216.970, "dur": 40.374, + "args": { + "External id": 973068,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936100266.160, "dur": 51.361, + "args": { + "External id": 973069,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936100327.005, "dur": 40.409, + "args": { + "External id": 973070,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936100395.941, "dur": 33.682, + "args": { + "External id": 973071,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936100438.708, "dur": 44.154, + "args": { + "External id": 973072,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936100508.445, "dur": 24.575, + "args": { + "External id": 973073,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12117 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.0)", "pid": 2338706, "tid": 2338706, + "ts": 6345936100709.034, "dur": 101.159, + "args": { + "External id": 973074,"Record function id": 0, "Ev Idx": 12118 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936100898.899, "dur": 52.577, + "args": { + "External id": 973075,"Record function id": 0, "Ev Idx": 12119 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.1)", "pid": 2338706, "tid": 2338706, + "ts": 6345936100962.622, "dur": 27342.406, + "args": { + "External id": 973076,"Record function id": 0, "Ev Idx": 12120 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338706, "tid": 2338706, + "ts": 6345936100976.281, "dur": 1230.541, + "args": { + "External id": 973077,"Record function id": 0, "Ev Idx": 12121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936101150.350, "dur": 12.390, + "args": { + "External id": 973078,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936101181.326, "dur": 43.821, + "args": { + "External id": 973079,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936101187.327, "dur": 4.535, + "args": { + "External id": 973080,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936101193.685, "dur": 0.602, + "args": { + "External id": 973081,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936101196.972, "dur": 0.350, + "args": { + "External id": 973082,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936101200.640, "dur": 0.470, + "args": { + "External id": 973083,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936101202.011, "dur": 3.435, + "args": { + "External id": 973084,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936101208.223, "dur": 0.354, + "args": { + "External id": 973085,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936101210.113, "dur": 0.401, + "args": { + "External id": 973086,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936101213.187, "dur": 0.333, + "args": { + "External id": 973087,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936101214.342, "dur": 2.669, + "args": { + "External id": 973088,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936101238.435, "dur": 65.106, + "args": { + "External id": 973089,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936101347.191, "dur": 148.376, + "args": { + "External id": 973090,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936101361.808, "dur": 4.539, + "args": { + "External id": 973091,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936101372.737, "dur": 10.930, + "args": { + "External id": 973092,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936101377.704, "dur": 5.449, + "args": { + "External id": 973093,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936101380.969, "dur": 0.665, + "args": { + "External id": 973094,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936101391.377, "dur": 34.513, + "args": { + "External id": 973095,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936101394.631, "dur": 0.521, + "args": { + "External id": 973096,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936101396.352, "dur": 2.884, + "args": { + "External id": 973097,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936101400.206, "dur": 2.352, + "args": { + "External id": 973098,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936101405.046, "dur": 0.473, + "args": { + "External id": 973099,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936101406.425, "dur": 0.482, + "args": { + "External id": 973100,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936101412.334, "dur": 0.579, + "args": { + "External id": 973101,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936101413.813, "dur": 0.418, + "args": { + "External id": 973102,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936101415.038, "dur": 0.563, + "args": { + "External id": 973103,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936101420.151, "dur": 0.433, + "args": { + "External id": 973104,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936101441.168, "dur": 40.918, + "args": { + "External id": 973105,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936101561.042, "dur": 477.693, + "args": { + "External id": 973106,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936101599.389, "dur": 430.860, + "args": { + "External id": 973107,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12151, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936101610.946, "dur": 390.292, + "args": { + "External id": 973108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936102109.710, "dur": 4.923, + "args": { + "External id": 973109,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12153, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338706, "tid": 2338706, + "ts": 6345936102233.315, "dur": 25754.250, + "args": { + "External id": 973110,"Record function id": 0, "Ev Idx": 12154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936102359.450, "dur": 7.852, + "args": { + "External id": 973111,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936102371.591, "dur": 1.286, + "args": { + "External id": 973112,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936102375.031, "dur": 1.110, + "args": { + "External id": 973113,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936102378.304, "dur": 1.036, + "args": { + "External id": 973114,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936102380.874, "dur": 1.417, + "args": { + "External id": 973115,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936102386.613, "dur": 1.244, + "args": { + "External id": 973116,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936102389.435, "dur": 1.191, + "args": { + "External id": 973117,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936102392.771, "dur": 4.997, + "args": { + "External id": 973118,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936102399.497, "dur": 1.050, + "args": { + "External id": 973119,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936102404.862, "dur": 0.842, + "args": { + "External id": 973120,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936102428.032, "dur": 25506.626, + "args": { + "External id": 973121,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936102451.623, "dur": 25472.335, + "args": { + "External id": 973122,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936102480.115, "dur": 17.296, + "args": { + "External id": 973123,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936102502.911, "dur": 25374.837, + "args": { + "External id": 973124,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936102505.922, "dur": 25370.084, + "args": { + "External id": 973125,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936102511.671, "dur": 5.750, + "args": { + "External id": 973126,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936102519.202, "dur": 25353.397, + "args": { + "External id": 973127,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936128235.243, "dur": 38.890, + "args": { + "External id": 973128,"Sequence number": 10552245, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12172 + } + }, + { + "ph": "s", "id": 221, "pid": 2338706, "tid": 2338706, "ts": 6345936128235.243, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936128257.448, "dur": 11.197, + "args": { + "External id": 973129,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936128262.866, "dur": 5.375, + "args": { + "External id": 973130,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936128353.491, "dur": 92.128, + "args": { + "External id": 973131,"Record function id": 0, "Ev Idx": 12175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936128446.990, "dur": 1358.001, + "args": { + "External id": 973132,"Record function id": 0, "Ev Idx": 12176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936128491.261, "dur": 1298.138, + "args": { + "External id": 973133,"Sequence number": 10552246, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12177 + } + }, + { + "ph": "s", "id": 220, "pid": 2338706, "tid": 2338706, "ts": 6345936128491.261, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936128588.939, "dur": 53.531, + "args": { + "External id": 973134,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936128660.468, "dur": 122.440, + "args": { + "External id": 973135,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936128796.697, "dur": 45.073, + "args": { + "External id": 973136,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936128848.722, "dur": 34.901, + "args": { + "External id": 973137,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936128917.691, "dur": 32.918, + "args": { + "External id": 973138,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936128977.272, "dur": 24.766, + "args": { + "External id": 973139,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936129113.786, "dur": 168.839, + "args": { + "External id": 973140,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936129177.582, "dur": 22.024, + "args": { + "External id": 973141,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936129185.689, "dur": 12.690, + "args": { + "External id": 973142,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936129202.693, "dur": 5.159, + "args": { + "External id": 973143,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936129209.217, "dur": 3.204, + "args": { + "External id": 973144,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936129215.044, "dur": 3.529, + "args": { + "External id": 973145,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936129296.774, "dur": 68.041, + "args": { + "External id": 973146,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936129404.409, "dur": 36.608, + "args": { + "External id": 973147,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936129453.109, "dur": 50.584, + "args": { + "External id": 973148,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936129512.351, "dur": 41.245, + "args": { + "External id": 973149,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936129578.053, "dur": 33.560, + "args": { + "External id": 973150,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936129618.097, "dur": 44.200, + "args": { + "External id": 973151,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936129687.047, "dur": 26.957, + "args": { + "External id": 973152,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12196 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.1)", "pid": 2338706, "tid": 2338706, + "ts": 6345936129878.250, "dur": 92.227, + "args": { + "External id": 973153,"Record function id": 0, "Ev Idx": 12197 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936130136.236, "dur": 59.401, + "args": { + "External id": 973154,"Record function id": 0, "Ev Idx": 12198 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.2)", "pid": 2338706, "tid": 2338706, + "ts": 6345936130206.901, "dur": 27094.869, + "args": { + "External id": 973155,"Record function id": 0, "Ev Idx": 12199 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338706, "tid": 2338706, + "ts": 6345936130219.111, "dur": 1098.871, + "args": { + "External id": 973156,"Record function id": 0, "Ev Idx": 12200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936130315.645, "dur": 12.217, + "args": { + "External id": 973157,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936130342.974, "dur": 43.839, + "args": { + "External id": 973158,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936130349.202, "dur": 2.564, + "args": { + "External id": 973159,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936130358.622, "dur": 0.800, + "args": { + "External id": 973160,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936130360.323, "dur": 0.612, + "args": { + "External id": 973161,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936130361.833, "dur": 0.379, + "args": { + "External id": 973162,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936130366.622, "dur": 0.567, + "args": { + "External id": 973163,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936130368.267, "dur": 0.418, + "args": { + "External id": 973164,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936130371.772, "dur": 4.740, + "args": { + "External id": 973165,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936130377.697, "dur": 0.390, + "args": { + "External id": 973166,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936130378.875, "dur": 0.527, + "args": { + "External id": 973167,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936130401.123, "dur": 68.504, + "args": { + "External id": 973168,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936130509.130, "dur": 147.069, + "args": { + "External id": 973169,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936130522.926, "dur": 4.523, + "args": { + "External id": 973170,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936130533.616, "dur": 11.869, + "args": { + "External id": 973171,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936130538.795, "dur": 6.195, + "args": { + "External id": 973172,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936130542.619, "dur": 0.617, + "args": { + "External id": 973173,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936130554.683, "dur": 37.808, + "args": { + "External id": 973174,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936130556.559, "dur": 2.384, + "args": { + "External id": 973175,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936130559.984, "dur": 0.604, + "args": { + "External id": 973176,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936130564.556, "dur": 0.571, + "args": { + "External id": 973177,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936130567.968, "dur": 2.609, + "args": { + "External id": 973178,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936130574.019, "dur": 0.394, + "args": { + "External id": 973179,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936130575.358, "dur": 0.592, + "args": { + "External id": 973180,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936130578.925, "dur": 0.650, + "args": { + "External id": 973181,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936130583.593, "dur": 0.477, + "args": { + "External id": 973182,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936130585.192, "dur": 1.970, + "args": { + "External id": 973183,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936130605.589, "dur": 40.751, + "args": { + "External id": 973184,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936130719.197, "dur": 476.821, + "args": { + "External id": 973185,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936130757.847, "dur": 431.771, + "args": { + "External id": 973186,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12230, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936130773.204, "dur": 409.385, + "args": { + "External id": 973187,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936131228.363, "dur": 3.009, + "args": { + "External id": 973188,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12232, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338706, "tid": 2338706, + "ts": 6345936131342.260, "dur": 25659.571, + "args": { + "External id": 973189,"Record function id": 0, "Ev Idx": 12233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936131467.960, "dur": 7.625, + "args": { + "External id": 973190,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936131479.844, "dur": 1.369, + "args": { + "External id": 973191,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936131483.102, "dur": 3.391, + "args": { + "External id": 973192,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936131488.609, "dur": 1.197, + "args": { + "External id": 973193,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936131491.511, "dur": 1.380, + "args": { + "External id": 973194,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936131494.350, "dur": 0.982, + "args": { + "External id": 973195,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936131499.273, "dur": 1.517, + "args": { + "External id": 973196,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936131502.665, "dur": 2.353, + "args": { + "External id": 973197,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936131506.629, "dur": 0.882, + "args": { + "External id": 973198,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936131509.171, "dur": 0.648, + "args": { + "External id": 973199,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936131533.128, "dur": 25415.326, + "args": { + "External id": 973200,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936131551.179, "dur": 25386.200, + "args": { + "External id": 973201,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936131571.304, "dur": 18.403, + "args": { + "External id": 973202,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936131594.250, "dur": 25296.474, + "args": { + "External id": 973203,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936131597.256, "dur": 25291.905, + "args": { + "External id": 973204,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936131603.864, "dur": 6.109, + "args": { + "External id": 973205,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936131611.779, "dur": 25273.094, + "args": { + "External id": 973206,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936157234.966, "dur": 37.633, + "args": { + "External id": 973207,"Sequence number": 10552247, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12251 + } + }, + { + "ph": "s", "id": 219, "pid": 2338706, "tid": 2338706, "ts": 6345936157234.966, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936157254.773, "dur": 12.071, + "args": { + "External id": 973208,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936157260.010, "dur": 6.428, + "args": { + "External id": 973209,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936157346.591, "dur": 87.209, + "args": { + "External id": 973210,"Record function id": 0, "Ev Idx": 12254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936157436.301, "dur": 1349.841, + "args": { + "External id": 973211,"Record function id": 0, "Ev Idx": 12255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936157487.953, "dur": 1280.851, + "args": { + "External id": 973212,"Sequence number": 10552248, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12256 + } + }, + { + "ph": "s", "id": 218, "pid": 2338706, "tid": 2338706, "ts": 6345936157487.953, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936157571.539, "dur": 60.135, + "args": { + "External id": 973213,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936157648.836, "dur": 119.317, + "args": { + "External id": 973214,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936157783.618, "dur": 44.679, + "args": { + "External id": 973215,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936157841.289, "dur": 35.683, + "args": { + "External id": 973216,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936157906.120, "dur": 33.566, + "args": { + "External id": 973217,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936157963.713, "dur": 25.634, + "args": { + "External id": 973218,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936158038.340, "dur": 213.736, + "args": { + "External id": 973219,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936158139.920, "dur": 19.881, + "args": { + "External id": 973220,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936158149.107, "dur": 8.904, + "args": { + "External id": 973221,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936158163.348, "dur": 6.139, + "args": { + "External id": 973222,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936158170.814, "dur": 1.513, + "args": { + "External id": 973223,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936158176.168, "dur": 6.192, + "args": { + "External id": 973224,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936158266.548, "dur": 67.828, + "args": { + "External id": 973225,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936158376.868, "dur": 36.593, + "args": { + "External id": 973226,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936158425.010, "dur": 52.607, + "args": { + "External id": 973227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936158487.032, "dur": 40.969, + "args": { + "External id": 973228,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936158552.610, "dur": 36.273, + "args": { + "External id": 973229,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936158597.901, "dur": 41.261, + "args": { + "External id": 973230,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936158663.244, "dur": 22.667, + "args": { + "External id": 973231,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12275 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.2)", "pid": 2338706, "tid": 2338706, + "ts": 6345936158859.497, "dur": 97.218, + "args": { + "External id": 973232,"Record function id": 0, "Ev Idx": 12276 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936159120.606, "dur": 63.073, + "args": { + "External id": 973233,"Record function id": 0, "Ev Idx": 12277 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.3)", "pid": 2338706, "tid": 2338706, + "ts": 6345936159195.503, "dur": 28070.119, + "args": { + "External id": 973234,"Record function id": 0, "Ev Idx": 12278 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338706, "tid": 2338706, + "ts": 6345936159208.353, "dur": 1091.655, + "args": { + "External id": 973235,"Record function id": 0, "Ev Idx": 12279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936159307.230, "dur": 12.600, + "args": { + "External id": 973236,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936159336.925, "dur": 46.020, + "args": { + "External id": 973237,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936159343.232, "dur": 2.538, + "args": { + "External id": 973238,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936159352.382, "dur": 0.459, + "args": { + "External id": 973239,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936159353.796, "dur": 0.670, + "args": { + "External id": 973240,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936159357.906, "dur": 0.663, + "args": { + "External id": 973241,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936159361.908, "dur": 0.783, + "args": { + "External id": 973242,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936159363.477, "dur": 0.810, + "args": { + "External id": 973243,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936159367.369, "dur": 4.705, + "args": { + "External id": 973244,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936159373.189, "dur": 0.533, + "args": { + "External id": 973245,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936159374.555, "dur": 0.731, + "args": { + "External id": 973246,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936159398.037, "dur": 67.187, + "args": { + "External id": 973247,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936159509.695, "dur": 162.518, + "args": { + "External id": 973248,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936159524.480, "dur": 5.996, + "args": { + "External id": 973249,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936159537.147, "dur": 14.224, + "args": { + "External id": 973250,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936159542.214, "dur": 8.634, + "args": { + "External id": 973251,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936159548.412, "dur": 0.827, + "args": { + "External id": 973252,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936159559.356, "dur": 34.277, + "args": { + "External id": 973253,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936159560.844, "dur": 0.451, + "args": { + "External id": 973254,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936159565.568, "dur": 2.062, + "args": { + "External id": 973255,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936159568.431, "dur": 0.575, + "args": { + "External id": 973256,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936159569.739, "dur": 2.935, + "args": { + "External id": 973257,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936159578.278, "dur": 0.254, + "args": { + "External id": 973258,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936159579.494, "dur": 0.498, + "args": { + "External id": 973259,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936159580.751, "dur": 0.444, + "args": { + "External id": 973260,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936159585.555, "dur": 0.579, + "args": { + "External id": 973261,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936159586.847, "dur": 0.569, + "args": { + "External id": 973262,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936159620.170, "dur": 41.393, + "args": { + "External id": 973263,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936159736.471, "dur": 441.974, + "args": { + "External id": 973264,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936159783.094, "dur": 388.788, + "args": { + "External id": 973265,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12309, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936159795.845, "dur": 369.107, + "args": { + "External id": 973266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936160209.240, "dur": 3.233, + "args": { + "External id": 973267,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12311, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338706, "tid": 2338706, + "ts": 6345936160323.943, "dur": 26631.198, + "args": { + "External id": 973268,"Record function id": 0, "Ev Idx": 12312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936160441.949, "dur": 7.822, + "args": { + "External id": 973269,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936160453.723, "dur": 1.498, + "args": { + "External id": 973270,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936160457.033, "dur": 4.500, + "args": { + "External id": 973271,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936160463.580, "dur": 1.334, + "args": { + "External id": 973272,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936160466.536, "dur": 0.987, + "args": { + "External id": 973273,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936160469.089, "dur": 1.100, + "args": { + "External id": 973274,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936160473.854, "dur": 1.089, + "args": { + "External id": 973275,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936160476.771, "dur": 2.538, + "args": { + "External id": 973276,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936160480.741, "dur": 1.044, + "args": { + "External id": 973277,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936160485.899, "dur": 1.018, + "args": { + "External id": 973278,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936160508.540, "dur": 26391.174, + "args": { + "External id": 973279,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936160526.908, "dur": 26362.850, + "args": { + "External id": 973280,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936160548.340, "dur": 19.669, + "args": { + "External id": 973281,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936160573.115, "dur": 26273.056, + "args": { + "External id": 973282,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936160576.235, "dur": 26268.426, + "args": { + "External id": 973283,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936160584.675, "dur": 6.992, + "args": { + "External id": 973284,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936160593.384, "dur": 26247.325, + "args": { + "External id": 973285,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936187188.395, "dur": 45.619, + "args": { + "External id": 973286,"Sequence number": 10552249, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12330 + } + }, + { + "ph": "s", "id": 217, "pid": 2338706, "tid": 2338706, "ts": 6345936187188.395, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936187212.013, "dur": 16.337, + "args": { + "External id": 973287,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936187221.116, "dur": 6.773, + "args": { + "External id": 973288,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936187313.169, "dur": 87.607, + "args": { + "External id": 973289,"Record function id": 0, "Ev Idx": 12333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936187405.012, "dur": 1323.387, + "args": { + "External id": 973290,"Record function id": 0, "Ev Idx": 12334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936187451.059, "dur": 1261.068, + "args": { + "External id": 973291,"Sequence number": 10552250, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12335 + } + }, + { + "ph": "s", "id": 216, "pid": 2338706, "tid": 2338706, "ts": 6345936187451.059, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936187528.616, "dur": 60.135, + "args": { + "External id": 973292,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936187604.222, "dur": 120.776, + "args": { + "External id": 973293,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936187744.251, "dur": 44.357, + "args": { + "External id": 973294,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936187795.459, "dur": 33.963, + "args": { + "External id": 973295,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936187861.437, "dur": 33.868, + "args": { + "External id": 973296,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936187920.025, "dur": 21.503, + "args": { + "External id": 973297,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936187970.437, "dur": 234.231, + "args": { + "External id": 973298,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936188092.819, "dur": 18.805, + "args": { + "External id": 973299,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936188100.553, "dur": 8.580, + "args": { + "External id": 973300,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936188114.544, "dur": 4.927, + "args": { + "External id": 973301,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936188120.813, "dur": 0.986, + "args": { + "External id": 973302,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936188126.740, "dur": 5.912, + "args": { + "External id": 973303,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936188220.247, "dur": 68.741, + "args": { + "External id": 973304,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936188334.469, "dur": 38.864, + "args": { + "External id": 973305,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936188384.806, "dur": 50.083, + "args": { + "External id": 973306,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936188443.451, "dur": 40.816, + "args": { + "External id": 973307,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936188508.987, "dur": 32.102, + "args": { + "External id": 973308,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936188548.350, "dur": 40.810, + "args": { + "External id": 973309,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936188611.821, "dur": 22.449, + "args": { + "External id": 973310,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12354 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.3)", "pid": 2338706, "tid": 2338706, + "ts": 6345936188802.852, "dur": 97.346, + "args": { + "External id": 973311,"Record function id": 0, "Ev Idx": 12355 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936188988.383, "dur": 125.726, + "args": { + "External id": 973312,"Record function id": 0, "Ev Idx": 12356 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.4)", "pid": 2338706, "tid": 2338706, + "ts": 6345936189177.531, "dur": 31665.086, + "args": { + "External id": 973313,"Record function id": 0, "Ev Idx": 12357 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338706, "tid": 2338706, + "ts": 6345936189196.045, "dur": 1121.651, + "args": { + "External id": 973314,"Record function id": 0, "Ev Idx": 12358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936189299.042, "dur": 12.646, + "args": { + "External id": 973315,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936189329.412, "dur": 44.941, + "args": { + "External id": 973316,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936189338.210, "dur": 2.414, + "args": { + "External id": 973317,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936189344.829, "dur": 0.581, + "args": { + "External id": 973318,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936189346.116, "dur": 0.486, + "args": { + "External id": 973319,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936189349.889, "dur": 2.056, + "args": { + "External id": 973320,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936189352.627, "dur": 0.526, + "args": { + "External id": 973321,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936189356.252, "dur": 0.385, + "args": { + "External id": 973322,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936189359.196, "dur": 2.896, + "args": { + "External id": 973323,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936189362.682, "dur": 0.437, + "args": { + "External id": 973324,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936189366.251, "dur": 0.289, + "args": { + "External id": 973325,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936189386.830, "dur": 67.895, + "args": { + "External id": 973326,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936189493.102, "dur": 149.371, + "args": { + "External id": 973327,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936189507.946, "dur": 4.559, + "args": { + "External id": 973328,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936189518.874, "dur": 15.150, + "args": { + "External id": 973329,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936189523.963, "dur": 9.522, + "args": { + "External id": 973330,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936189529.563, "dur": 2.526, + "args": { + "External id": 973331,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936189542.216, "dur": 33.387, + "args": { + "External id": 973332,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936189543.689, "dur": 0.384, + "args": { + "External id": 973333,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936189547.277, "dur": 0.711, + "args": { + "External id": 973334,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936189548.936, "dur": 0.480, + "args": { + "External id": 973335,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936189555.314, "dur": 2.716, + "args": { + "External id": 973336,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936189558.916, "dur": 0.363, + "args": { + "External id": 973337,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936189560.070, "dur": 1.866, + "args": { + "External id": 973338,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936189565.081, "dur": 0.423, + "args": { + "External id": 973339,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936189566.081, "dur": 0.287, + "args": { + "External id": 973340,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936189569.109, "dur": 0.343, + "args": { + "External id": 973341,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936189591.576, "dur": 40.328, + "args": { + "External id": 973342,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936189705.083, "dur": 493.575, + "args": { + "External id": 973343,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936189744.856, "dur": 446.841, + "args": { + "External id": 973344,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12388, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936189756.158, "dur": 428.443, + "args": { + "External id": 973345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936190228.058, "dur": 2.954, + "args": { + "External id": 973346,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12390, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338706, "tid": 2338706, + "ts": 6345936190343.094, "dur": 30238.154, + "args": { + "External id": 973347,"Record function id": 0, "Ev Idx": 12391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936190464.865, "dur": 7.573, + "args": { + "External id": 973348,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936190476.208, "dur": 1.044, + "args": { + "External id": 973349,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936190479.532, "dur": 3.669, + "args": { + "External id": 973350,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936190485.099, "dur": 1.274, + "args": { + "External id": 973351,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936190487.907, "dur": 1.072, + "args": { + "External id": 973352,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936190490.398, "dur": 0.909, + "args": { + "External id": 973353,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936190495.249, "dur": 0.970, + "args": { + "External id": 973354,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936190500.421, "dur": 3.245, + "args": { + "External id": 973355,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936190505.134, "dur": 1.029, + "args": { + "External id": 973356,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936190507.752, "dur": 1.031, + "args": { + "External id": 973357,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936190530.784, "dur": 29997.724, + "args": { + "External id": 973358,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936190551.733, "dur": 29966.750, + "args": { + "External id": 973359,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936190574.186, "dur": 20.306, + "args": { + "External id": 973360,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936190599.026, "dur": 29872.001, + "args": { + "External id": 973361,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936190602.048, "dur": 29867.202, + "args": { + "External id": 973362,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936190607.203, "dur": 7.112, + "args": { + "External id": 973363,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936190616.333, "dur": 29849.341, + "args": { + "External id": 973364,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936220763.698, "dur": 43.543, + "args": { + "External id": 973365,"Sequence number": 10552251, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12409 + } + }, + { + "ph": "s", "id": 215, "pid": 2338706, "tid": 2338706, "ts": 6345936220763.698, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936220786.134, "dur": 14.383, + "args": { + "External id": 973366,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936220794.986, "dur": 5.237, + "args": { + "External id": 973367,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936220897.699, "dur": 86.977, + "args": { + "External id": 973368,"Record function id": 0, "Ev Idx": 12412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936220986.330, "dur": 1427.833, + "args": { + "External id": 973369,"Record function id": 0, "Ev Idx": 12413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936221095.370, "dur": 1300.575, + "args": { + "External id": 973370,"Sequence number": 10552252, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12414 + } + }, + { + "ph": "s", "id": 214, "pid": 2338706, "tid": 2338706, "ts": 6345936221095.370, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936221189.453, "dur": 64.394, + "args": { + "External id": 973371,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936221273.030, "dur": 119.340, + "args": { + "External id": 973372,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936221408.778, "dur": 45.762, + "args": { + "External id": 973373,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936221464.229, "dur": 34.090, + "args": { + "External id": 973374,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936221532.241, "dur": 33.618, + "args": { + "External id": 973375,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936221592.180, "dur": 21.689, + "args": { + "External id": 973376,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936221640.738, "dur": 164.848, + "args": { + "External id": 973377,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936221701.314, "dur": 15.025, + "args": { + "External id": 973378,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936221708.022, "dur": 7.092, + "args": { + "External id": 973379,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936221720.599, "dur": 5.520, + "args": { + "External id": 973380,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936221729.950, "dur": 1.304, + "args": { + "External id": 973381,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936221734.326, "dur": 8.189, + "args": { + "External id": 973382,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936221819.300, "dur": 56.805, + "args": { + "External id": 973383,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936221912.479, "dur": 35.539, + "args": { + "External id": 973384,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936221958.320, "dur": 69.263, + "args": { + "External id": 973385,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936222043.792, "dur": 90.212, + "args": { + "External id": 973386,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936222168.933, "dur": 36.050, + "args": { + "External id": 973387,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936222214.870, "dur": 47.077, + "args": { + "External id": 973388,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936222286.005, "dur": 22.985, + "args": { + "External id": 973389,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12433 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.4)", "pid": 2338706, "tid": 2338706, + "ts": 6345936222491.750, "dur": 93.687, + "args": { + "External id": 973390,"Record function id": 0, "Ev Idx": 12434 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936222672.610, "dur": 54.586, + "args": { + "External id": 973391,"Record function id": 0, "Ev Idx": 12435 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.5)", "pid": 2338706, "tid": 2338706, + "ts": 6345936222737.864, "dur": 30518.263, + "args": { + "External id": 973392,"Record function id": 0, "Ev Idx": 12436 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338706, "tid": 2338706, + "ts": 6345936222751.523, "dur": 1195.125, + "args": { + "External id": 973393,"Record function id": 0, "Ev Idx": 12437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936222843.291, "dur": 10.697, + "args": { + "External id": 973394,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936222869.798, "dur": 45.792, + "args": { + "External id": 973395,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936222878.184, "dur": 2.427, + "args": { + "External id": 973396,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936222884.085, "dur": 0.671, + "args": { + "External id": 973397,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936222888.152, "dur": 0.447, + "args": { + "External id": 973398,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936222889.414, "dur": 0.437, + "args": { + "External id": 973399,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936222892.039, "dur": 0.482, + "args": { + "External id": 973400,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936222896.265, "dur": 0.541, + "args": { + "External id": 973401,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936222897.738, "dur": 4.196, + "args": { + "External id": 973402,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936222902.751, "dur": 0.594, + "args": { + "External id": 973403,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936222907.245, "dur": 0.494, + "args": { + "External id": 973404,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936222928.981, "dur": 64.016, + "args": { + "External id": 973405,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936223096.536, "dur": 171.089, + "args": { + "External id": 973406,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936223116.300, "dur": 7.053, + "args": { + "External id": 973407,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936223130.265, "dur": 12.307, + "args": { + "External id": 973408,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936223135.512, "dur": 6.541, + "args": { + "External id": 973409,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936223139.174, "dur": 0.932, + "args": { + "External id": 973410,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936223151.591, "dur": 36.475, + "args": { + "External id": 973411,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936223156.796, "dur": 2.029, + "args": { + "External id": 973412,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936223159.814, "dur": 0.859, + "args": { + "External id": 973413,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936223161.793, "dur": 0.513, + "args": { + "External id": 973414,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936223167.290, "dur": 3.031, + "args": { + "External id": 973415,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936223171.101, "dur": 0.281, + "args": { + "External id": 973416,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936223171.990, "dur": 0.534, + "args": { + "External id": 973417,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936223175.750, "dur": 0.407, + "args": { + "External id": 973418,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936223176.788, "dur": 0.604, + "args": { + "External id": 973419,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936223181.214, "dur": 1.715, + "args": { + "External id": 973420,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936223206.796, "dur": 50.051, + "args": { + "External id": 973421,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936223337.041, "dur": 495.890, + "args": { + "External id": 973422,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936223376.841, "dur": 449.747, + "args": { + "External id": 973423,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12467, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936223389.570, "dur": 429.771, + "args": { + "External id": 973424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936223859.260, "dur": 2.903, + "args": { + "External id": 973425,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12469, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338706, "tid": 2338706, + "ts": 6345936223971.500, "dur": 28966.289, + "args": { + "External id": 973426,"Record function id": 0, "Ev Idx": 12470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936224162.549, "dur": 8.239, + "args": { + "External id": 973427,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936224175.859, "dur": 1.416, + "args": { + "External id": 973428,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936224179.530, "dur": 3.612, + "args": { + "External id": 973429,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936224185.429, "dur": 1.371, + "args": { + "External id": 973430,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936224188.428, "dur": 1.181, + "args": { + "External id": 973431,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936224194.049, "dur": 1.099, + "args": { + "External id": 973432,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936224199.410, "dur": 1.172, + "args": { + "External id": 973433,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936224202.165, "dur": 2.904, + "args": { + "External id": 973434,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936224206.767, "dur": 1.106, + "args": { + "External id": 973435,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936224211.713, "dur": 0.968, + "args": { + "External id": 973436,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936224233.839, "dur": 28646.198, + "args": { + "External id": 973437,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936224263.193, "dur": 28606.382, + "args": { + "External id": 973438,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936224281.309, "dur": 20.240, + "args": { + "External id": 973439,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936224305.928, "dur": 28519.472, + "args": { + "External id": 973440,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936224310.133, "dur": 28513.449, + "args": { + "External id": 973441,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936224316.408, "dur": 5.979, + "args": { + "External id": 973442,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936224324.142, "dur": 28496.012, + "args": { + "External id": 973443,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936253172.889, "dur": 43.048, + "args": { + "External id": 973444,"Sequence number": 10552253, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12488 + } + }, + { + "ph": "s", "id": 213, "pid": 2338706, "tid": 2338706, "ts": 6345936253172.889, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936253194.022, "dur": 14.949, + "args": { + "External id": 973445,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936253202.028, "dur": 6.573, + "args": { + "External id": 973446,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936253311.672, "dur": 90.257, + "args": { + "External id": 973447,"Record function id": 0, "Ev Idx": 12491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936253403.420, "dur": 1349.237, + "args": { + "External id": 973448,"Record function id": 0, "Ev Idx": 12492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936253448.375, "dur": 1288.273, + "args": { + "External id": 973449,"Sequence number": 10552254, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12493 + } + }, + { + "ph": "s", "id": 212, "pid": 2338706, "tid": 2338706, "ts": 6345936253448.375, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936253538.994, "dur": 62.322, + "args": { + "External id": 973450,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936253619.065, "dur": 119.479, + "args": { + "External id": 973451,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936253754.516, "dur": 42.518, + "args": { + "External id": 973452,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936253809.048, "dur": 34.645, + "args": { + "External id": 973453,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936253877.930, "dur": 32.979, + "args": { + "External id": 973454,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936253933.640, "dur": 19.855, + "args": { + "External id": 973455,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936253978.522, "dur": 245.226, + "args": { + "External id": 973456,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936254095.856, "dur": 16.504, + "args": { + "External id": 973457,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936254102.598, "dur": 8.569, + "args": { + "External id": 973458,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936254119.532, "dur": 7.668, + "args": { + "External id": 973459,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936254128.542, "dur": 2.279, + "args": { + "External id": 973460,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936254143.417, "dur": 5.661, + "args": { + "External id": 973461,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936254240.927, "dur": 66.593, + "args": { + "External id": 973462,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936254350.848, "dur": 38.313, + "args": { + "External id": 973463,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936254401.413, "dur": 51.834, + "args": { + "External id": 973464,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936254461.246, "dur": 40.236, + "args": { + "External id": 973465,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936254526.919, "dur": 33.873, + "args": { + "External id": 973466,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936254567.072, "dur": 44.402, + "args": { + "External id": 973467,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936254634.127, "dur": 22.629, + "args": { + "External id": 973468,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12512 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.5)", "pid": 2338706, "tid": 2338706, + "ts": 6345936254827.582, "dur": 98.760, + "args": { + "External id": 973469,"Record function id": 0, "Ev Idx": 12513 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936255040.267, "dur": 102.030, + "args": { + "External id": 973470,"Record function id": 0, "Ev Idx": 12514 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.6)", "pid": 2338706, "tid": 2338706, + "ts": 6345936255155.350, "dur": 33191.906, + "args": { + "External id": 973471,"Record function id": 0, "Ev Idx": 12515 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338706, "tid": 2338706, + "ts": 6345936255169.008, "dur": 1119.408, + "args": { + "External id": 973472,"Record function id": 0, "Ev Idx": 12516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936255267.211, "dur": 12.508, + "args": { + "External id": 973473,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936255300.125, "dur": 45.334, + "args": { + "External id": 973474,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936255306.150, "dur": 2.942, + "args": { + "External id": 973475,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936255313.391, "dur": 0.414, + "args": { + "External id": 973476,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936255317.465, "dur": 0.678, + "args": { + "External id": 973477,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936255318.985, "dur": 0.533, + "args": { + "External id": 973478,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936255321.928, "dur": 0.496, + "args": { + "External id": 973479,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936255325.847, "dur": 0.726, + "args": { + "External id": 973480,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936255327.197, "dur": 4.307, + "args": { + "External id": 973481,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936255334.724, "dur": 0.555, + "args": { + "External id": 973482,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936255336.271, "dur": 0.667, + "args": { + "External id": 973483,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936255358.253, "dur": 69.311, + "args": { + "External id": 973484,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936255470.164, "dur": 150.963, + "args": { + "External id": 973485,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936255484.606, "dur": 4.463, + "args": { + "External id": 973486,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936255495.022, "dur": 10.669, + "args": { + "External id": 973487,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936255499.863, "dur": 5.328, + "args": { + "External id": 973488,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936255503.227, "dur": 0.579, + "args": { + "External id": 973489,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936255512.779, "dur": 39.613, + "args": { + "External id": 973490,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936255516.790, "dur": 2.401, + "args": { + "External id": 973491,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936255520.293, "dur": 0.714, + "args": { + "External id": 973492,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936255522.018, "dur": 0.697, + "args": { + "External id": 973493,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936255528.979, "dur": 2.673, + "args": { + "External id": 973494,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936255532.373, "dur": 0.291, + "args": { + "External id": 973495,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936255535.973, "dur": 0.539, + "args": { + "External id": 973496,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936255538.492, "dur": 0.295, + "args": { + "External id": 973497,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936255539.715, "dur": 0.561, + "args": { + "External id": 973498,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936255543.612, "dur": 1.968, + "args": { + "External id": 973499,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936255568.598, "dur": 41.660, + "args": { + "External id": 973500,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936255682.194, "dur": 483.906, + "args": { + "External id": 973501,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936255721.565, "dur": 437.505, + "args": { + "External id": 973502,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12546, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936255732.703, "dur": 417.339, + "args": { + "External id": 973503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936256197.643, "dur": 3.041, + "args": { + "External id": 973504,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12548, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338706, "tid": 2338706, + "ts": 6345936256312.573, "dur": 31780.876, + "args": { + "External id": 973505,"Record function id": 0, "Ev Idx": 12549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936256430.197, "dur": 7.291, + "args": { + "External id": 973506,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936256441.425, "dur": 1.183, + "args": { + "External id": 973507,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936256444.818, "dur": 3.445, + "args": { + "External id": 973508,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936256450.254, "dur": 1.394, + "args": { + "External id": 973509,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936256453.278, "dur": 1.031, + "args": { + "External id": 973510,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936256455.820, "dur": 1.094, + "args": { + "External id": 973511,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936256461.846, "dur": 1.097, + "args": { + "External id": 973512,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936256464.759, "dur": 3.936, + "args": { + "External id": 973513,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936256470.368, "dur": 1.019, + "args": { + "External id": 973514,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936256472.993, "dur": 0.849, + "args": { + "External id": 973515,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936256499.389, "dur": 31493.945, + "args": { + "External id": 973516,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936256518.399, "dur": 31464.207, + "args": { + "External id": 973517,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936256540.831, "dur": 19.803, + "args": { + "External id": 973518,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936256565.378, "dur": 31376.065, + "args": { + "External id": 973519,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936256568.466, "dur": 31371.199, + "args": { + "External id": 973520,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936256574.371, "dur": 8.238, + "args": { + "External id": 973521,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936256584.983, "dur": 31351.145, + "args": { + "External id": 973522,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936288272.043, "dur": 43.478, + "args": { + "External id": 973523,"Sequence number": 10552255, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12567 + } + }, + { + "ph": "s", "id": 211, "pid": 2338706, "tid": 2338706, "ts": 6345936288272.043, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936288297.055, "dur": 11.474, + "args": { + "External id": 973524,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936288301.990, "dur": 6.255, + "args": { + "External id": 973525,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936288402.427, "dur": 91.595, + "args": { + "External id": 973526,"Record function id": 0, "Ev Idx": 12570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936288495.498, "dur": 1333.394, + "args": { + "External id": 973527,"Record function id": 0, "Ev Idx": 12571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936288542.648, "dur": 1267.048, + "args": { + "External id": 973528,"Sequence number": 10552256, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12572 + } + }, + { + "ph": "s", "id": 210, "pid": 2338706, "tid": 2338706, "ts": 6345936288542.648, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936288633.159, "dur": 60.338, + "args": { + "External id": 973529,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936288711.340, "dur": 119.585, + "args": { + "External id": 973530,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936288845.584, "dur": 45.589, + "args": { + "External id": 973531,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936288901.981, "dur": 38.607, + "args": { + "External id": 973532,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936288972.477, "dur": 29.179, + "args": { + "External id": 973533,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936289050.110, "dur": 67.008, + "args": { + "External id": 973534,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936289146.940, "dur": 158.754, + "args": { + "External id": 973535,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936289205.689, "dur": 17.775, + "args": { + "External id": 973536,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936289214.850, "dur": 7.707, + "args": { + "External id": 973537,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936289227.173, "dur": 4.368, + "args": { + "External id": 973538,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936289232.687, "dur": 1.449, + "args": { + "External id": 973539,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936289236.721, "dur": 6.011, + "args": { + "External id": 973540,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936289319.340, "dur": 65.757, + "args": { + "External id": 973541,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936289427.145, "dur": 36.814, + "args": { + "External id": 973542,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936289474.677, "dur": 51.704, + "args": { + "External id": 973543,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936289534.812, "dur": 40.256, + "args": { + "External id": 973544,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936289599.059, "dur": 31.363, + "args": { + "External id": 973545,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936289638.209, "dur": 41.359, + "args": { + "External id": 973546,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936289703.092, "dur": 20.543, + "args": { + "External id": 973547,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12591 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.6)", "pid": 2338706, "tid": 2338706, + "ts": 6345936289905.938, "dur": 95.119, + "args": { + "External id": 973548,"Record function id": 0, "Ev Idx": 12592 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936290159.747, "dur": 59.792, + "args": { + "External id": 973549,"Record function id": 0, "Ev Idx": 12593 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.7)", "pid": 2338706, "tid": 2338706, + "ts": 6345936290231.223, "dur": 30711.603, + "args": { + "External id": 973550,"Record function id": 0, "Ev Idx": 12594 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338706, "tid": 2338706, + "ts": 6345936290244.016, "dur": 1127.607, + "args": { + "External id": 973551,"Record function id": 0, "Ev Idx": 12595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936290341.920, "dur": 12.012, + "args": { + "External id": 973552,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936290370.129, "dur": 46.089, + "args": { + "External id": 973553,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936290376.372, "dur": 2.385, + "args": { + "External id": 973554,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936290385.101, "dur": 0.556, + "args": { + "External id": 973555,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936290386.617, "dur": 0.444, + "args": { + "External id": 973556,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936290388.049, "dur": 0.592, + "args": { + "External id": 973557,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936290392.958, "dur": 0.464, + "args": { + "External id": 973558,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936290394.244, "dur": 0.476, + "args": { + "External id": 973559,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936290398.021, "dur": 4.898, + "args": { + "External id": 973560,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936290403.772, "dur": 0.531, + "args": { + "External id": 973561,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936290405.030, "dur": 0.523, + "args": { + "External id": 973562,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936290434.039, "dur": 68.487, + "args": { + "External id": 973563,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936290542.261, "dur": 152.571, + "args": { + "External id": 973564,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936290557.471, "dur": 4.817, + "args": { + "External id": 973565,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936290568.352, "dur": 11.766, + "args": { + "External id": 973566,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936290573.601, "dur": 6.014, + "args": { + "External id": 973567,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936290577.225, "dur": 0.863, + "args": { + "External id": 973568,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936290589.952, "dur": 34.300, + "args": { + "External id": 973569,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936290591.605, "dur": 1.953, + "args": { + "External id": 973570,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936290594.300, "dur": 0.401, + "args": { + "External id": 973571,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936290597.572, "dur": 0.446, + "args": { + "External id": 973572,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936290600.199, "dur": 2.953, + "args": { + "External id": 973573,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936290606.246, "dur": 0.461, + "args": { + "External id": 973574,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936290607.301, "dur": 0.400, + "args": { + "External id": 973575,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936290610.597, "dur": 0.273, + "args": { + "External id": 973576,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936290613.741, "dur": 0.371, + "args": { + "External id": 973577,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936290615.378, "dur": 1.861, + "args": { + "External id": 973578,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936290641.604, "dur": 42.685, + "args": { + "External id": 973579,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936290757.878, "dur": 491.109, + "args": { + "External id": 973580,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936290796.175, "dur": 446.255, + "args": { + "External id": 973581,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12625, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936290809.780, "dur": 425.282, + "args": { + "External id": 973582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936291280.961, "dur": 2.826, + "args": { + "External id": 973583,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12627, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338706, "tid": 2338706, + "ts": 6345936291396.561, "dur": 29300.169, + "args": { + "External id": 973584,"Record function id": 0, "Ev Idx": 12628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936291517.448, "dur": 7.440, + "args": { + "External id": 973585,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936291528.768, "dur": 1.424, + "args": { + "External id": 973586,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936291532.136, "dur": 3.852, + "args": { + "External id": 973587,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936291548.633, "dur": 1.347, + "args": { + "External id": 973588,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936291551.735, "dur": 1.694, + "args": { + "External id": 973589,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936291554.934, "dur": 1.514, + "args": { + "External id": 973590,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936291560.736, "dur": 1.402, + "args": { + "External id": 973591,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936291565.777, "dur": 3.750, + "args": { + "External id": 973592,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936291571.085, "dur": 0.965, + "args": { + "External id": 973593,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936291573.751, "dur": 0.706, + "args": { + "External id": 973594,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936291598.261, "dur": 29043.113, + "args": { + "External id": 973595,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936291617.521, "dur": 29013.539, + "args": { + "External id": 973596,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936291634.274, "dur": 19.819, + "args": { + "External id": 973597,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936291658.162, "dur": 28929.275, + "args": { + "External id": 973598,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936291661.053, "dur": 28925.625, + "args": { + "External id": 973599,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936291666.548, "dur": 7.868, + "args": { + "External id": 973600,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936291676.525, "dur": 28905.446, + "args": { + "External id": 973601,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936320873.370, "dur": 40.259, + "args": { + "External id": 973602,"Sequence number": 10552257, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12646 + } + }, + { + "ph": "s", "id": 209, "pid": 2338706, "tid": 2338706, "ts": 6345936320873.370, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936320894.377, "dur": 13.827, + "args": { + "External id": 973603,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936320901.890, "dur": 6.081, + "args": { + "External id": 973604,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936320990.125, "dur": 142.664, + "args": { + "External id": 973605,"Record function id": 0, "Ev Idx": 12649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936321136.096, "dur": 1325.750, + "args": { + "External id": 973606,"Record function id": 0, "Ev Idx": 12650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936321190.632, "dur": 1254.021, + "args": { + "External id": 973607,"Sequence number": 10552258, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12651 + } + }, + { + "ph": "s", "id": 208, "pid": 2338706, "tid": 2338706, "ts": 6345936321190.632, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936321275.759, "dur": 63.254, + "args": { + "External id": 973608,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936321356.310, "dur": 116.785, + "args": { + "External id": 973609,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936321490.074, "dur": 43.249, + "args": { + "External id": 973610,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936321542.414, "dur": 33.401, + "args": { + "External id": 973611,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936321604.176, "dur": 31.889, + "args": { + "External id": 973612,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936321661.588, "dur": 19.788, + "args": { + "External id": 973613,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936321709.227, "dur": 158.033, + "args": { + "External id": 973614,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936321766.522, "dur": 14.588, + "args": { + "External id": 973615,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936321773.201, "dur": 6.980, + "args": { + "External id": 973616,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936321785.249, "dur": 4.298, + "args": { + "External id": 973617,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936321793.432, "dur": 1.275, + "args": { + "External id": 973618,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936321797.440, "dur": 8.003, + "args": { + "External id": 973619,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936321880.248, "dur": 55.431, + "args": { + "External id": 973620,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936321970.706, "dur": 32.639, + "args": { + "External id": 973621,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936322037.661, "dur": 95.597, + "args": { + "External id": 973622,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936322148.247, "dur": 43.234, + "args": { + "External id": 973623,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936322230.742, "dur": 36.920, + "args": { + "External id": 973624,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936322277.043, "dur": 44.895, + "args": { + "External id": 973625,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936322342.601, "dur": 23.891, + "args": { + "External id": 973626,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12670 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.7)", "pid": 2338706, "tid": 2338706, + "ts": 6345936322538.770, "dur": 91.750, + "args": { + "External id": 973627,"Record function id": 0, "Ev Idx": 12671 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936322715.863, "dur": 53.551, + "args": { + "External id": 973628,"Record function id": 0, "Ev Idx": 12672 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.8)", "pid": 2338706, "tid": 2338706, + "ts": 6345936322780.660, "dur": 32344.966, + "args": { + "External id": 973629,"Record function id": 0, "Ev Idx": 12673 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338706, "tid": 2338706, + "ts": 6345936322794.217, "dur": 1087.240, + "args": { + "External id": 973630,"Record function id": 0, "Ev Idx": 12674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936322890.039, "dur": 8.909, + "args": { + "External id": 973631,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936322914.020, "dur": 46.695, + "args": { + "External id": 973632,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936322922.119, "dur": 2.541, + "args": { + "External id": 973633,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936322928.589, "dur": 0.482, + "args": { + "External id": 973634,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936322932.618, "dur": 0.553, + "args": { + "External id": 973635,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936322934.039, "dur": 0.490, + "args": { + "External id": 973636,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936322937.150, "dur": 0.646, + "args": { + "External id": 973637,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936322941.351, "dur": 0.599, + "args": { + "External id": 973638,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936322942.926, "dur": 3.968, + "args": { + "External id": 973639,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936322947.809, "dur": 0.448, + "args": { + "External id": 973640,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936322951.879, "dur": 0.287, + "args": { + "External id": 973641,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936322973.690, "dur": 119.084, + "args": { + "External id": 973642,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936323138.970, "dur": 156.580, + "args": { + "External id": 973643,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936323156.908, "dur": 7.194, + "args": { + "External id": 973644,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936323170.459, "dur": 15.173, + "args": { + "External id": 973645,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936323178.375, "dur": 6.759, + "args": { + "External id": 973646,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936323182.348, "dur": 1.022, + "args": { + "External id": 973647,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936323194.020, "dur": 36.871, + "args": { + "External id": 973648,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936323198.536, "dur": 1.992, + "args": { + "External id": 973649,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936323201.401, "dur": 0.483, + "args": { + "External id": 973650,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936323202.591, "dur": 0.639, + "args": { + "External id": 973651,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936323209.020, "dur": 2.905, + "args": { + "External id": 973652,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936323212.706, "dur": 0.518, + "args": { + "External id": 973653,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936323214.087, "dur": 0.308, + "args": { + "External id": 973654,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936323218.898, "dur": 0.340, + "args": { + "External id": 973655,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936323219.898, "dur": 0.326, + "args": { + "External id": 973656,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936323223.424, "dur": 2.167, + "args": { + "External id": 973657,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936323244.258, "dur": 40.353, + "args": { + "External id": 973658,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936323362.251, "dur": 411.390, + "args": { + "External id": 973659,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936323400.368, "dur": 367.502, + "args": { + "External id": 973660,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12704, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936323412.247, "dur": 349.583, + "args": { + "External id": 973661,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936323798.373, "dur": 2.840, + "args": { + "External id": 973662,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12706, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338706, "tid": 2338706, + "ts": 6345936323907.138, "dur": 30927.910, + "args": { + "External id": 973663,"Record function id": 0, "Ev Idx": 12707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936324050.312, "dur": 45.475, + "args": { + "External id": 973664,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936324104.317, "dur": 1.592, + "args": { + "External id": 973665,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936324108.200, "dur": 3.253, + "args": { + "External id": 973666,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936324113.549, "dur": 0.898, + "args": { + "External id": 973667,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936324116.523, "dur": 0.967, + "args": { + "External id": 973668,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936324119.040, "dur": 1.030, + "args": { + "External id": 973669,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936324124.719, "dur": 1.011, + "args": { + "External id": 973670,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936324127.436, "dur": 2.379, + "args": { + "External id": 973671,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936324131.250, "dur": 0.658, + "args": { + "External id": 973672,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936324133.342, "dur": 0.719, + "args": { + "External id": 973673,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936324158.881, "dur": 30617.095, + "args": { + "External id": 973674,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936324179.962, "dur": 30585.892, + "args": { + "External id": 973675,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936324202.576, "dur": 18.992, + "args": { + "External id": 973676,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936324225.376, "dur": 30494.651, + "args": { + "External id": 973677,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936324228.349, "dur": 30489.951, + "args": { + "External id": 973678,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936324233.460, "dur": 6.283, + "args": { + "External id": 973679,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936324241.897, "dur": 30472.754, + "args": { + "External id": 973680,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936355003.490, "dur": 89.464, + "args": { + "External id": 973681,"Sequence number": 10552259, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12725 + } + }, + { + "ph": "s", "id": 207, "pid": 2338706, "tid": 2338706, "ts": 6345936355003.490, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936355038.508, "dur": 46.179, + "args": { + "External id": 973682,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936355046.202, "dur": 37.571, + "args": { + "External id": 973683,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936355175.202, "dur": 85.294, + "args": { + "External id": 973684,"Record function id": 0, "Ev Idx": 12728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936355261.784, "dur": 1324.967, + "args": { + "External id": 973685,"Record function id": 0, "Ev Idx": 12729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936355307.485, "dur": 1262.520, + "args": { + "External id": 973686,"Sequence number": 10552260, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12730 + } + }, + { + "ph": "s", "id": 206, "pid": 2338706, "tid": 2338706, "ts": 6345936355307.485, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936355398.709, "dur": 60.492, + "args": { + "External id": 973687,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936355477.159, "dur": 118.489, + "args": { + "External id": 973688,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936355614.307, "dur": 43.621, + "args": { + "External id": 973689,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936355665.431, "dur": 35.117, + "args": { + "External id": 973690,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936355735.594, "dur": 32.416, + "args": { + "External id": 973691,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936355792.766, "dur": 23.715, + "args": { + "External id": 973692,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936355840.813, "dur": 156.232, + "args": { + "External id": 973693,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936355898.447, "dur": 13.262, + "args": { + "External id": 973694,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936355904.977, "dur": 5.766, + "args": { + "External id": 973695,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936355917.195, "dur": 4.144, + "args": { + "External id": 973696,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936355922.633, "dur": 3.023, + "args": { + "External id": 973697,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936355928.438, "dur": 6.053, + "args": { + "External id": 973698,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936356029.746, "dur": 110.456, + "args": { + "External id": 973699,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936356188.606, "dur": 39.608, + "args": { + "External id": 973700,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936356240.224, "dur": 51.835, + "args": { + "External id": 973701,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936356302.112, "dur": 40.571, + "args": { + "External id": 973702,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936356371.316, "dur": 30.274, + "args": { + "External id": 973703,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936356408.457, "dur": 41.363, + "args": { + "External id": 973704,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936356470.130, "dur": 23.245, + "args": { + "External id": 973705,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12749 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.8)", "pid": 2338706, "tid": 2338706, + "ts": 6345936356660.623, "dur": 89.656, + "args": { + "External id": 973706,"Record function id": 0, "Ev Idx": 12750 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936356835.379, "dur": 54.824, + "args": { + "External id": 973707,"Record function id": 0, "Ev Idx": 12751 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.9)", "pid": 2338706, "tid": 2338706, + "ts": 6345936356901.532, "dur": 31557.902, + "args": { + "External id": 973708,"Record function id": 0, "Ev Idx": 12752 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338706, "tid": 2338706, + "ts": 6345936356914.954, "dur": 1130.065, + "args": { + "External id": 973709,"Record function id": 0, "Ev Idx": 12753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936357033.274, "dur": 12.675, + "args": { + "External id": 973710,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936357107.016, "dur": 42.309, + "args": { + "External id": 973711,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936357112.888, "dur": 2.767, + "args": { + "External id": 973712,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936357119.427, "dur": 0.478, + "args": { + "External id": 973713,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936357123.461, "dur": 0.541, + "args": { + "External id": 973714,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936357124.883, "dur": 0.555, + "args": { + "External id": 973715,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936357127.729, "dur": 0.536, + "args": { + "External id": 973716,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936357131.053, "dur": 0.476, + "args": { + "External id": 973717,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936357132.418, "dur": 4.291, + "args": { + "External id": 973718,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936357140.018, "dur": 0.430, + "args": { + "External id": 973719,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936357141.239, "dur": 0.432, + "args": { + "External id": 973720,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936357162.877, "dur": 69.094, + "args": { + "External id": 973721,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936357274.629, "dur": 159.726, + "args": { + "External id": 973722,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936357289.470, "dur": 6.146, + "args": { + "External id": 973723,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936357301.948, "dur": 13.565, + "args": { + "External id": 973724,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936357309.382, "dur": 5.641, + "args": { + "External id": 973725,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936357312.808, "dur": 0.813, + "args": { + "External id": 973726,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936357322.900, "dur": 38.593, + "args": { + "External id": 973727,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936357327.833, "dur": 1.761, + "args": { + "External id": 973728,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936357330.499, "dur": 0.801, + "args": { + "External id": 973729,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936357332.298, "dur": 0.622, + "args": { + "External id": 973730,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936357337.562, "dur": 3.185, + "args": { + "External id": 973731,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936357344.858, "dur": 0.511, + "args": { + "External id": 973732,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936357348.127, "dur": 0.450, + "args": { + "External id": 973733,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936357351.010, "dur": 0.648, + "args": { + "External id": 973734,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936357352.255, "dur": 0.390, + "args": { + "External id": 973735,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936357355.688, "dur": 1.481, + "args": { + "External id": 973736,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936357384.814, "dur": 39.384, + "args": { + "External id": 973737,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936357501.159, "dur": 412.315, + "args": { + "External id": 973738,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936357540.598, "dur": 367.326, + "args": { + "External id": 973739,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12783, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936357553.833, "dur": 348.191, + "args": { + "External id": 973740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936357940.551, "dur": 2.685, + "args": { + "External id": 973741,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12785, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338706, "tid": 2338706, + "ts": 6345936358112.831, "dur": 30097.054, + "args": { + "External id": 973742,"Record function id": 0, "Ev Idx": 12786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936358236.758, "dur": 7.969, + "args": { + "External id": 973743,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936358248.773, "dur": 1.455, + "args": { + "External id": 973744,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936358252.178, "dur": 3.755, + "args": { + "External id": 973745,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936358257.928, "dur": 1.063, + "args": { + "External id": 973746,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936358260.539, "dur": 1.306, + "args": { + "External id": 973747,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936358263.462, "dur": 1.091, + "args": { + "External id": 973748,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936358268.213, "dur": 1.451, + "args": { + "External id": 973749,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936358271.665, "dur": 2.263, + "args": { + "External id": 973750,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936358275.615, "dur": 0.869, + "args": { + "External id": 973751,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936358278.093, "dur": 0.970, + "args": { + "External id": 973752,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936358303.675, "dur": 29853.687, + "args": { + "External id": 973753,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936358321.953, "dur": 29825.120, + "args": { + "External id": 973754,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936358344.619, "dur": 18.197, + "args": { + "External id": 973755,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936358366.682, "dur": 29736.163, + "args": { + "External id": 973756,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936358369.726, "dur": 29731.928, + "args": { + "External id": 973757,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936358375.365, "dur": 6.474, + "args": { + "External id": 973758,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936358383.767, "dur": 29713.831, + "args": { + "External id": 973759,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936388387.357, "dur": 43.225, + "args": { + "External id": 973760,"Sequence number": 10552261, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12804 + } + }, + { + "ph": "s", "id": 205, "pid": 2338706, "tid": 2338706, "ts": 6345936388387.357, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936388412.414, "dur": 11.646, + "args": { + "External id": 973761,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936388417.221, "dur": 6.590, + "args": { + "External id": 973762,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936388511.498, "dur": 89.400, + "args": { + "External id": 973763,"Record function id": 0, "Ev Idx": 12807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936388602.310, "dur": 1371.358, + "args": { + "External id": 973764,"Record function id": 0, "Ev Idx": 12808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936388650.803, "dur": 1305.529, + "args": { + "External id": 973765,"Sequence number": 10552262, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12809 + } + }, + { + "ph": "s", "id": 204, "pid": 2338706, "tid": 2338706, "ts": 6345936388650.803, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936388739.974, "dur": 58.171, + "args": { + "External id": 973766,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936388816.122, "dur": 120.469, + "args": { + "External id": 973767,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936388954.602, "dur": 47.330, + "args": { + "External id": 973768,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936389034.152, "dur": 85.454, + "args": { + "External id": 973769,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936389160.556, "dur": 37.090, + "args": { + "External id": 973770,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936389220.239, "dur": 21.144, + "args": { + "External id": 973771,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936389268.738, "dur": 161.199, + "args": { + "External id": 973772,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936389328.632, "dur": 17.007, + "args": { + "External id": 973773,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936389337.889, "dur": 6.788, + "args": { + "External id": 973774,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936389349.824, "dur": 5.532, + "args": { + "External id": 973775,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936389356.481, "dur": 1.069, + "args": { + "External id": 973776,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936389360.306, "dur": 6.574, + "args": { + "External id": 973777,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936389443.747, "dur": 68.067, + "args": { + "External id": 973778,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936389555.810, "dur": 38.108, + "args": { + "External id": 973779,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936389604.734, "dur": 53.021, + "args": { + "External id": 973780,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936389666.561, "dur": 44.065, + "args": { + "External id": 973781,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936389740.102, "dur": 33.274, + "args": { + "External id": 973782,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936389781.881, "dur": 44.162, + "args": { + "External id": 973783,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936389849.931, "dur": 23.118, + "args": { + "External id": 973784,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12828 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.9)", "pid": 2338706, "tid": 2338706, + "ts": 6345936390110.944, "dur": 100.097, + "args": { + "External id": 973785,"Record function id": 0, "Ev Idx": 12829 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936390300.862, "dur": 55.252, + "args": { + "External id": 973786,"Record function id": 0, "Ev Idx": 12830 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.10)", "pid": 2338706, "tid": 2338706, + "ts": 6345936390367.288, "dur": 32445.676, + "args": { + "External id": 973787,"Record function id": 0, "Ev Idx": 12831 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338706, "tid": 2338706, + "ts": 6345936390381.817, "dur": 1183.767, + "args": { + "External id": 973788,"Record function id": 0, "Ev Idx": 12832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936390480.297, "dur": 11.075, + "args": { + "External id": 973789,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936390507.603, "dur": 48.417, + "args": { + "External id": 973790,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936390517.095, "dur": 2.722, + "args": { + "External id": 973791,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936390526.389, "dur": 0.578, + "args": { + "External id": 973792,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936390527.905, "dur": 0.828, + "args": { + "External id": 973793,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936390532.450, "dur": 1.732, + "args": { + "External id": 973794,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936390534.852, "dur": 0.588, + "args": { + "External id": 973795,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936390536.489, "dur": 0.586, + "args": { + "External id": 973796,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936390542.717, "dur": 3.500, + "args": { + "External id": 973797,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936390547.255, "dur": 0.392, + "args": { + "External id": 973798,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936390548.526, "dur": 0.441, + "args": { + "External id": 973799,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936390571.258, "dur": 69.055, + "args": { + "External id": 973800,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936390679.701, "dur": 146.370, + "args": { + "External id": 973801,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936390695.600, "dur": 4.522, + "args": { + "External id": 973802,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936390706.513, "dur": 18.335, + "args": { + "External id": 973803,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936390714.620, "dur": 9.585, + "args": { + "External id": 973804,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936390720.735, "dur": 2.171, + "args": { + "External id": 973805,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936390732.391, "dur": 31.602, + "args": { + "External id": 973806,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936390733.982, "dur": 0.555, + "args": { + "External id": 973807,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936390738.126, "dur": 0.807, + "args": { + "External id": 973808,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936390739.773, "dur": 0.422, + "args": { + "External id": 973809,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936390742.440, "dur": 3.033, + "args": { + "External id": 973810,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936390748.355, "dur": 0.602, + "args": { + "External id": 973811,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936390749.754, "dur": 1.951, + "args": { + "External id": 973812,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936390752.544, "dur": 0.619, + "args": { + "External id": 973813,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936390756.151, "dur": 0.776, + "args": { + "External id": 973814,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936390758.946, "dur": 0.325, + "args": { + "External id": 973815,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936390777.304, "dur": 39.367, + "args": { + "External id": 973816,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936390887.757, "dur": 553.194, + "args": { + "External id": 973817,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936390926.069, "dur": 508.122, + "args": { + "External id": 973818,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12862, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936390937.197, "dur": 489.313, + "args": { + "External id": 973819,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936391472.863, "dur": 3.344, + "args": { + "External id": 973820,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12864, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338706, "tid": 2338706, + "ts": 6345936391589.987, "dur": 30962.619, + "args": { + "External id": 973821,"Record function id": 0, "Ev Idx": 12865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936391711.037, "dur": 8.044, + "args": { + "External id": 973822,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936391722.793, "dur": 1.306, + "args": { + "External id": 973823,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936391726.104, "dur": 3.693, + "args": { + "External id": 973824,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936391799.490, "dur": 1.510, + "args": { + "External id": 973825,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936391804.828, "dur": 0.731, + "args": { + "External id": 973826,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936391807.148, "dur": 1.033, + "args": { + "External id": 973827,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936391812.251, "dur": 0.756, + "args": { + "External id": 973828,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936391816.299, "dur": 2.371, + "args": { + "External id": 973829,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936391820.102, "dur": 0.905, + "args": { + "External id": 973830,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936391822.357, "dur": 0.750, + "args": { + "External id": 973831,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936391847.973, "dur": 30650.180, + "args": { + "External id": 973832,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936391868.598, "dur": 30619.048, + "args": { + "External id": 973833,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936391887.268, "dur": 18.081, + "args": { + "External id": 973834,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936391909.613, "dur": 30530.130, + "args": { + "External id": 973835,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936391912.571, "dur": 30525.484, + "args": { + "External id": 973836,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936391917.984, "dur": 7.217, + "args": { + "External id": 973837,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936391927.075, "dur": 30507.132, + "args": { + "External id": 973838,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936422738.110, "dur": 43.077, + "args": { + "External id": 973839,"Sequence number": 10552263, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12883 + } + }, + { + "ph": "s", "id": 203, "pid": 2338706, "tid": 2338706, "ts": 6345936422738.110, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936422759.949, "dur": 14.506, + "args": { + "External id": 973840,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936422767.609, "dur": 6.579, + "args": { + "External id": 973841,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936422865.280, "dur": 84.425, + "args": { + "External id": 973842,"Record function id": 0, "Ev Idx": 12886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936422951.361, "dur": 1410.213, + "args": { + "External id": 973843,"Record function id": 0, "Ev Idx": 12887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936423000.650, "dur": 1342.434, + "args": { + "External id": 973844,"Sequence number": 10552264, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12888 + } + }, + { + "ph": "s", "id": 202, "pid": 2338706, "tid": 2338706, "ts": 6345936423000.650, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936423148.766, "dur": 65.021, + "args": { + "External id": 973845,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936423233.495, "dur": 118.179, + "args": { + "External id": 973846,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936423368.657, "dur": 42.980, + "args": { + "External id": 973847,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936423421.147, "dur": 35.817, + "args": { + "External id": 973848,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936423487.806, "dur": 31.919, + "args": { + "External id": 973849,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936423544.824, "dur": 21.420, + "args": { + "External id": 973850,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936423591.890, "dur": 157.670, + "args": { + "External id": 973851,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936423650.181, "dur": 14.650, + "args": { + "External id": 973852,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936423657.158, "dur": 6.589, + "args": { + "External id": 973853,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936423668.809, "dur": 5.620, + "args": { + "External id": 973854,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936423677.525, "dur": 1.192, + "args": { + "External id": 973855,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936423681.405, "dur": 5.760, + "args": { + "External id": 973856,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936423764.203, "dur": 56.714, + "args": { + "External id": 973857,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936423861.274, "dur": 37.651, + "args": { + "External id": 973858,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936423909.704, "dur": 50.297, + "args": { + "External id": 973859,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936423969.807, "dur": 62.149, + "args": { + "External id": 973860,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936424105.283, "dur": 39.519, + "args": { + "External id": 973861,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936424155.049, "dur": 52.529, + "args": { + "External id": 973862,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936424230.719, "dur": 24.348, + "args": { + "External id": 973863,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12907 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.10)", "pid": 2338706, "tid": 2338706, + "ts": 6345936424438.360, "dur": 91.664, + "args": { + "External id": 973864,"Record function id": 0, "Ev Idx": 12908 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936424619.184, "dur": 53.290, + "args": { + "External id": 973865,"Record function id": 0, "Ev Idx": 12909 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.11)", "pid": 2338706, "tid": 2338706, + "ts": 6345936424683.734, "dur": 31080.849, + "args": { + "External id": 973866,"Record function id": 0, "Ev Idx": 12910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338706, "tid": 2338706, + "ts": 6345936424698.268, "dur": 1097.150, + "args": { + "External id": 973867,"Record function id": 0, "Ev Idx": 12911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936424794.720, "dur": 10.950, + "args": { + "External id": 973868,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936424822.180, "dur": 44.363, + "args": { + "External id": 973869,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936424830.195, "dur": 2.491, + "args": { + "External id": 973870,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936424836.166, "dur": 0.668, + "args": { + "External id": 973871,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936424839.749, "dur": 0.585, + "args": { + "External id": 973872,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936424841.630, "dur": 0.432, + "args": { + "External id": 973873,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936424844.397, "dur": 0.432, + "args": { + "External id": 973874,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936424847.817, "dur": 0.614, + "args": { + "External id": 973875,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936424849.252, "dur": 4.398, + "args": { + "External id": 973876,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936424854.691, "dur": 0.362, + "args": { + "External id": 973877,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936424858.580, "dur": 0.440, + "args": { + "External id": 973878,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936424883.548, "dur": 63.038, + "args": { + "External id": 973879,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936424988.788, "dur": 222.502, + "args": { + "External id": 973880,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936425005.845, "dur": 25.659, + "args": { + "External id": 973881,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936425041.022, "dur": 52.720, + "args": { + "External id": 973882,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936425049.186, "dur": 43.582, + "args": { + "External id": 973883,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936425053.328, "dur": 0.987, + "args": { + "External id": 973884,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936425106.478, "dur": 35.428, + "args": { + "External id": 973885,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936425111.553, "dur": 0.557, + "args": { + "External id": 973886,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936425113.318, "dur": 1.887, + "args": { + "External id": 973887,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936425115.939, "dur": 0.798, + "args": { + "External id": 973888,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936425120.350, "dur": 3.102, + "args": { + "External id": 973889,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936425126.038, "dur": 0.456, + "args": { + "External id": 973890,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936425127.306, "dur": 0.584, + "args": { + "External id": 973891,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936425130.440, "dur": 0.637, + "args": { + "External id": 973892,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936425132.842, "dur": 0.679, + "args": { + "External id": 973893,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936425135.987, "dur": 0.653, + "args": { + "External id": 973894,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936425157.096, "dur": 43.974, + "args": { + "External id": 973895,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936425279.663, "dur": 413.160, + "args": { + "External id": 973896,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936425319.343, "dur": 367.286, + "args": { + "External id": 973897,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12941, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936425330.688, "dur": 349.341, + "args": { + "External id": 973898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936425716.113, "dur": 2.631, + "args": { + "External id": 973899,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12943, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338706, "tid": 2338706, + "ts": 6345936425818.800, "dur": 29674.653, + "args": { + "External id": 973900,"Record function id": 0, "Ev Idx": 12944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936425933.895, "dur": 6.555, + "args": { + "External id": 973901,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936425944.466, "dur": 1.285, + "args": { + "External id": 973902,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936425947.921, "dur": 3.747, + "args": { + "External id": 973903,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936425953.706, "dur": 1.412, + "args": { + "External id": 973904,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936425956.636, "dur": 0.938, + "args": { + "External id": 973905,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936425959.209, "dur": 1.063, + "args": { + "External id": 973906,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936425965.777, "dur": 1.054, + "args": { + "External id": 973907,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936425968.387, "dur": 2.688, + "args": { + "External id": 973908,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936425972.785, "dur": 1.088, + "args": { + "External id": 973909,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936425975.198, "dur": 1.126, + "args": { + "External id": 973910,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936425999.475, "dur": 29433.670, + "args": { + "External id": 973911,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936426045.761, "dur": 29376.715, + "args": { + "External id": 973912,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936426106.680, "dur": 20.466, + "args": { + "External id": 973913,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936426132.786, "dur": 29245.006, + "args": { + "External id": 973914,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936426135.866, "dur": 29240.176, + "args": { + "External id": 973915,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936426141.568, "dur": 8.306, + "args": { + "External id": 973916,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936426151.662, "dur": 29220.852, + "args": { + "External id": 973917,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936455683.241, "dur": 45.335, + "args": { + "External id": 973918,"Sequence number": 10552265, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12962 + } + }, + { + "ph": "s", "id": 201, "pid": 2338706, "tid": 2338706, "ts": 6345936455683.241, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936455708.016, "dur": 13.926, + "args": { + "External id": 973919,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936455714.518, "dur": 7.114, + "args": { + "External id": 973920,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936455816.840, "dur": 81.410, + "args": { + "External id": 973921,"Record function id": 0, "Ev Idx": 12965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936455900.376, "dur": 1396.358, + "args": { + "External id": 973922,"Record function id": 0, "Ev Idx": 12966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936455948.271, "dur": 1331.698, + "args": { + "External id": 973923,"Sequence number": 10552266, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12967 + } + }, + { + "ph": "s", "id": 200, "pid": 2338706, "tid": 2338706, "ts": 6345936455948.271, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936456086.457, "dur": 69.750, + "args": { + "External id": 973924,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936456178.849, "dur": 117.775, + "args": { + "External id": 973925,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936456315.176, "dur": 49.347, + "args": { + "External id": 973926,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936456373.813, "dur": 35.655, + "args": { + "External id": 973927,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936456445.119, "dur": 35.571, + "args": { + "External id": 973928,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936456503.657, "dur": 21.975, + "args": { + "External id": 973929,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936456550.912, "dur": 156.967, + "args": { + "External id": 973930,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936456608.122, "dur": 14.778, + "args": { + "External id": 973931,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936456615.095, "dur": 6.810, + "args": { + "External id": 973932,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936456628.887, "dur": 5.658, + "args": { + "External id": 973933,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936456635.833, "dur": 1.460, + "args": { + "External id": 973934,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936456639.930, "dur": 7.744, + "args": { + "External id": 973935,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936456722.423, "dur": 57.515, + "args": { + "External id": 973936,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936456819.052, "dur": 36.802, + "args": { + "External id": 973937,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936456869.559, "dur": 50.668, + "args": { + "External id": 973938,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936456928.799, "dur": 40.475, + "args": { + "External id": 973939,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936456994.225, "dur": 50.763, + "args": { + "External id": 973940,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936457092.983, "dur": 53.060, + "args": { + "External id": 973941,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936457173.837, "dur": 23.086, + "args": { + "External id": 973942,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12986 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.11)", "pid": 2338706, "tid": 2338706, + "ts": 6345936457373.487, "dur": 93.152, + "args": { + "External id": 973943,"Record function id": 0, "Ev Idx": 12987 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936457555.563, "dur": 55.594, + "args": { + "External id": 973944,"Record function id": 0, "Ev Idx": 12988 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.12)", "pid": 2338706, "tid": 2338706, + "ts": 6345936457621.735, "dur": 30322.002, + "args": { + "External id": 973945,"Record function id": 0, "Ev Idx": 12989 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338706, "tid": 2338706, + "ts": 6345936457633.953, "dur": 1085.375, + "args": { + "External id": 973946,"Record function id": 0, "Ev Idx": 12990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936457727.143, "dur": 10.976, + "args": { + "External id": 973947,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936457756.536, "dur": 41.749, + "args": { + "External id": 973948,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936457762.238, "dur": 2.515, + "args": { + "External id": 973949,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936457769.109, "dur": 0.716, + "args": { + "External id": 973950,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936457772.598, "dur": 0.550, + "args": { + "External id": 973951,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936457773.973, "dur": 0.562, + "args": { + "External id": 973952,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936457776.742, "dur": 0.460, + "args": { + "External id": 973953,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936457779.446, "dur": 0.448, + "args": { + "External id": 973954,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936457780.718, "dur": 4.010, + "args": { + "External id": 973955,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936457790.209, "dur": 0.390, + "args": { + "External id": 973956,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936457791.439, "dur": 0.399, + "args": { + "External id": 973957,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936457810.566, "dur": 62.830, + "args": { + "External id": 973958,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936457913.751, "dur": 214.897, + "args": { + "External id": 973959,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936457929.248, "dur": 4.575, + "args": { + "External id": 973960,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936457939.458, "dur": 16.246, + "args": { + "External id": 973961,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936457947.394, "dur": 7.784, + "args": { + "External id": 973962,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936457950.957, "dur": 2.762, + "args": { + "External id": 973963,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936457962.568, "dur": 32.034, + "args": { + "External id": 973964,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936457966.121, "dur": 0.527, + "args": { + "External id": 973965,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936457967.848, "dur": 0.830, + "args": { + "External id": 973966,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936457969.665, "dur": 0.346, + "args": { + "External id": 973967,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936457974.811, "dur": 2.882, + "args": { + "External id": 973968,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936457978.416, "dur": 0.550, + "args": { + "External id": 973969,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936457981.468, "dur": 0.935, + "args": { + "External id": 973970,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936457983.152, "dur": 0.341, + "args": { + "External id": 973971,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936457984.180, "dur": 0.468, + "args": { + "External id": 973972,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936457989.046, "dur": 0.424, + "args": { + "External id": 973973,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936458007.255, "dur": 108.592, + "args": { + "External id": 973974,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936458199.497, "dur": 413.001, + "args": { + "External id": 973975,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936458240.050, "dur": 366.793, + "args": { + "External id": 973976,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13020, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936458251.981, "dur": 348.756, + "args": { + "External id": 973977,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936458638.590, "dur": 2.865, + "args": { + "External id": 973978,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13022, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338706, "tid": 2338706, + "ts": 6345936458744.886, "dur": 28942.875, + "args": { + "External id": 973979,"Record function id": 0, "Ev Idx": 13023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936458862.868, "dur": 7.299, + "args": { + "External id": 973980,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936458874.026, "dur": 1.222, + "args": { + "External id": 973981,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936458877.456, "dur": 3.556, + "args": { + "External id": 973982,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936458882.843, "dur": 1.249, + "args": { + "External id": 973983,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936458885.715, "dur": 1.100, + "args": { + "External id": 973984,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936458888.437, "dur": 1.091, + "args": { + "External id": 973985,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936458893.613, "dur": 0.987, + "args": { + "External id": 973986,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936458896.592, "dur": 2.381, + "args": { + "External id": 973987,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936458900.547, "dur": 0.980, + "args": { + "External id": 973988,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936458903.124, "dur": 0.744, + "args": { + "External id": 973989,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936458927.520, "dur": 28704.386, + "args": { + "External id": 973990,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936458946.542, "dur": 28674.812, + "args": { + "External id": 973991,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936458967.719, "dur": 20.809, + "args": { + "External id": 973992,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936458993.706, "dur": 28584.640, + "args": { + "External id": 973993,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936458996.757, "dur": 28579.814, + "args": { + "External id": 973994,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936459002.386, "dur": 29.932, + "args": { + "External id": 973995,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936459036.816, "dur": 28535.981, + "args": { + "External id": 973996,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936487869.669, "dur": 41.208, + "args": { + "External id": 973997,"Sequence number": 10552267, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13041 + } + }, + { + "ph": "s", "id": 199, "pid": 2338706, "tid": 2338706, "ts": 6345936487869.669, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936487892.853, "dur": 11.174, + "args": { + "External id": 973998,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936487897.512, "dur": 6.222, + "args": { + "External id": 973999,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936487993.586, "dur": 135.185, + "args": { + "External id": 974000,"Record function id": 0, "Ev Idx": 13044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936488132.236, "dur": 1352.546, + "args": { + "External id": 974001,"Record function id": 0, "Ev Idx": 13045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936488185.358, "dur": 1283.600, + "args": { + "External id": 974002,"Sequence number": 10552268, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13046 + } + }, + { + "ph": "s", "id": 198, "pid": 2338706, "tid": 2338706, "ts": 6345936488185.358, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936488279.402, "dur": 63.260, + "args": { + "External id": 974003,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936488361.302, "dur": 119.914, + "args": { + "External id": 974004,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936488498.976, "dur": 42.964, + "args": { + "External id": 974005,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936488551.303, "dur": 36.358, + "args": { + "External id": 974006,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936488618.030, "dur": 33.488, + "args": { + "External id": 974007,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936488675.297, "dur": 23.383, + "args": { + "External id": 974008,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936488724.983, "dur": 154.901, + "args": { + "External id": 974009,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936488780.809, "dur": 16.747, + "args": { + "External id": 974010,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936488789.693, "dur": 7.005, + "args": { + "External id": 974011,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936488801.580, "dur": 5.179, + "args": { + "External id": 974012,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936488808.230, "dur": 1.381, + "args": { + "External id": 974013,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936488812.471, "dur": 4.839, + "args": { + "External id": 974014,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936488893.118, "dur": 56.983, + "args": { + "External id": 974015,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936488988.059, "dur": 52.601, + "args": { + "External id": 974016,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936489094.395, "dur": 62.761, + "args": { + "External id": 974017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936489169.200, "dur": 43.326, + "args": { + "External id": 974018,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936489250.576, "dur": 37.148, + "args": { + "External id": 974019,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936489296.576, "dur": 43.730, + "args": { + "External id": 974020,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936489364.045, "dur": 23.999, + "args": { + "External id": 974021,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13065 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.12)", "pid": 2338706, "tid": 2338706, + "ts": 6345936489560.692, "dur": 88.493, + "args": { + "External id": 974022,"Record function id": 0, "Ev Idx": 13066 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936489735.742, "dur": 54.204, + "args": { + "External id": 974023,"Record function id": 0, "Ev Idx": 13067 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.13)", "pid": 2338706, "tid": 2338706, + "ts": 6345936489801.031, "dur": 31157.833, + "args": { + "External id": 974024,"Record function id": 0, "Ev Idx": 13068 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338706, "tid": 2338706, + "ts": 6345936489812.135, "dur": 1096.479, + "args": { + "External id": 974025,"Record function id": 0, "Ev Idx": 13069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936489908.928, "dur": 10.336, + "args": { + "External id": 974026,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936489936.073, "dur": 44.891, + "args": { + "External id": 974027,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936489942.180, "dur": 2.630, + "args": { + "External id": 974028,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936489952.499, "dur": 0.445, + "args": { + "External id": 974029,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936489953.987, "dur": 0.534, + "args": { + "External id": 974030,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936489955.880, "dur": 0.784, + "args": { + "External id": 974031,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936489961.806, "dur": 1.017, + "args": { + "External id": 974032,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936489963.551, "dur": 0.456, + "args": { + "External id": 974033,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936489966.794, "dur": 4.265, + "args": { + "External id": 974034,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936489972.119, "dur": 0.518, + "args": { + "External id": 974035,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936489973.419, "dur": 0.512, + "args": { + "External id": 974036,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936489996.880, "dur": 132.760, + "args": { + "External id": 974037,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936490177.467, "dur": 151.168, + "args": { + "External id": 974038,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936490194.150, "dur": 5.976, + "args": { + "External id": 974039,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936490206.901, "dur": 12.233, + "args": { + "External id": 974040,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936490211.973, "dur": 6.663, + "args": { + "External id": 974041,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936490215.862, "dur": 1.004, + "args": { + "External id": 974042,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936490228.851, "dur": 32.010, + "args": { + "External id": 974043,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936490230.993, "dur": 2.020, + "args": { + "External id": 974044,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936490233.940, "dur": 0.476, + "args": { + "External id": 974045,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936490236.759, "dur": 0.609, + "args": { + "External id": 974046,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936490240.094, "dur": 2.886, + "args": { + "External id": 974047,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936490245.455, "dur": 0.396, + "args": { + "External id": 974048,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936490246.828, "dur": 0.338, + "args": { + "External id": 974049,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936490250.246, "dur": 0.334, + "args": { + "External id": 974050,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936490253.243, "dur": 0.387, + "args": { + "External id": 974051,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936490254.483, "dur": 2.085, + "args": { + "External id": 974052,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936490276.822, "dur": 42.239, + "args": { + "External id": 974053,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936490394.595, "dur": 408.737, + "args": { + "External id": 974054,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936490433.048, "dur": 364.648, + "args": { + "External id": 974055,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13099, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936490445.601, "dur": 345.703, + "args": { + "External id": 974056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936490830.221, "dur": 2.853, + "args": { + "External id": 974057,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13101, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338706, "tid": 2338706, + "ts": 6345936490931.190, "dur": 29801.706, + "args": { + "External id": 974058,"Record function id": 0, "Ev Idx": 13102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936491114.157, "dur": 7.904, + "args": { + "External id": 974059,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936491126.977, "dur": 1.287, + "args": { + "External id": 974060,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936491130.237, "dur": 4.167, + "args": { + "External id": 974061,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936491136.466, "dur": 0.830, + "args": { + "External id": 974062,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936491139.436, "dur": 1.336, + "args": { + "External id": 974063,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936491142.282, "dur": 1.204, + "args": { + "External id": 974064,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936491148.031, "dur": 0.831, + "args": { + "External id": 974065,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936491150.578, "dur": 2.356, + "args": { + "External id": 974066,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936491154.559, "dur": 1.002, + "args": { + "External id": 974067,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936491157.148, "dur": 1.073, + "args": { + "External id": 974068,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936491183.917, "dur": 29496.999, + "args": { + "External id": 974069,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936491202.312, "dur": 29468.357, + "args": { + "External id": 974070,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936491222.244, "dur": 18.719, + "args": { + "External id": 974071,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936491245.204, "dur": 29379.350, + "args": { + "External id": 974072,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936491248.465, "dur": 29374.302, + "args": { + "External id": 974073,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936491254.977, "dur": 6.206, + "args": { + "External id": 974074,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936491263.333, "dur": 29356.149, + "args": { + "External id": 974075,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936520893.205, "dur": 37.863, + "args": { + "External id": 974076,"Sequence number": 10552269, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13120 + } + }, + { + "ph": "s", "id": 197, "pid": 2338706, "tid": 2338706, "ts": 6345936520893.205, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936520914.399, "dur": 11.331, + "args": { + "External id": 974077,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936520919.514, "dur": 5.950, + "args": { + "External id": 974078,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936521004.929, "dur": 132.596, + "args": { + "External id": 974079,"Record function id": 0, "Ev Idx": 13123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936521140.759, "dur": 1328.150, + "args": { + "External id": 974080,"Record function id": 0, "Ev Idx": 13124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936521190.549, "dur": 1260.821, + "args": { + "External id": 974081,"Sequence number": 10552270, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13125 + } + }, + { + "ph": "s", "id": 196, "pid": 2338706, "tid": 2338706, "ts": 6345936521190.549, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936521275.865, "dur": 60.839, + "args": { + "External id": 974082,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936521353.967, "dur": 121.572, + "args": { + "External id": 974083,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936521491.772, "dur": 44.748, + "args": { + "External id": 974084,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936521550.451, "dur": 35.933, + "args": { + "External id": 974085,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936521617.092, "dur": 33.526, + "args": { + "External id": 974086,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936521674.830, "dur": 21.954, + "args": { + "External id": 974087,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936521724.689, "dur": 154.189, + "args": { + "External id": 974088,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936521783.170, "dur": 14.276, + "args": { + "External id": 974089,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936521790.411, "dur": 6.040, + "args": { + "External id": 974090,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936521800.804, "dur": 5.549, + "args": { + "External id": 974091,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936521807.911, "dur": 1.315, + "args": { + "External id": 974092,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936521812.271, "dur": 5.641, + "args": { + "External id": 974093,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936521891.947, "dur": 57.270, + "args": { + "External id": 974094,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936521984.770, "dur": 58.368, + "args": { + "External id": 974095,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936522098.584, "dur": 57.955, + "args": { + "External id": 974096,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936522167.280, "dur": 41.917, + "args": { + "External id": 974097,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936522239.358, "dur": 33.981, + "args": { + "External id": 974098,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936522282.165, "dur": 42.849, + "args": { + "External id": 974099,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936522346.219, "dur": 23.987, + "args": { + "External id": 974100,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13144 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.13)", "pid": 2338706, "tid": 2338706, + "ts": 6345936522545.462, "dur": 91.756, + "args": { + "External id": 974101,"Record function id": 0, "Ev Idx": 13145 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936522725.705, "dur": 56.582, + "args": { + "External id": 974102,"Record function id": 0, "Ev Idx": 13146 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.14)", "pid": 2338706, "tid": 2338706, + "ts": 6345936522793.139, "dur": 30097.420, + "args": { + "External id": 974103,"Record function id": 0, "Ev Idx": 13147 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338706, "tid": 2338706, + "ts": 6345936522803.437, "dur": 1107.081, + "args": { + "External id": 974104,"Record function id": 0, "Ev Idx": 13148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936522901.390, "dur": 10.404, + "args": { + "External id": 974105,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936522926.478, "dur": 48.688, + "args": { + "External id": 974106,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936522933.050, "dur": 2.368, + "args": { + "External id": 974107,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936522940.577, "dur": 0.387, + "args": { + "External id": 974108,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936522942.807, "dur": 0.587, + "args": { + "External id": 974109,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936522945.368, "dur": 0.626, + "args": { + "External id": 974110,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936522949.853, "dur": 0.551, + "args": { + "External id": 974111,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936522952.339, "dur": 0.800, + "args": { + "External id": 974112,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936522959.561, "dur": 4.487, + "args": { + "External id": 974113,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936522966.066, "dur": 0.493, + "args": { + "External id": 974114,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936522968.108, "dur": 0.364, + "args": { + "External id": 974115,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936522990.825, "dur": 126.295, + "args": { + "External id": 974116,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936523161.390, "dur": 156.075, + "args": { + "External id": 974117,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936523178.494, "dur": 8.417, + "args": { + "External id": 974118,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936523196.446, "dur": 13.535, + "args": { + "External id": 974119,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936523201.644, "dur": 7.840, + "args": { + "External id": 974120,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936523206.590, "dur": 0.872, + "args": { + "External id": 974121,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936523218.581, "dur": 35.832, + "args": { + "External id": 974122,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936523221.475, "dur": 0.611, + "args": { + "External id": 974123,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936523223.857, "dur": 2.447, + "args": { + "External id": 974124,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936523227.985, "dur": 0.518, + "args": { + "External id": 974125,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936523229.754, "dur": 3.092, + "args": { + "External id": 974126,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936523236.737, "dur": 0.335, + "args": { + "External id": 974127,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936523238.965, "dur": 0.677, + "args": { + "External id": 974128,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936523241.575, "dur": 0.645, + "args": { + "External id": 974129,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936523245.924, "dur": 0.602, + "args": { + "External id": 974130,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936523248.762, "dur": 0.583, + "args": { + "External id": 974131,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936523269.269, "dur": 37.428, + "args": { + "External id": 974132,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936523384.836, "dur": 417.140, + "args": { + "External id": 974133,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936523422.716, "dur": 373.218, + "args": { + "External id": 974134,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13178, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936523435.963, "dur": 350.997, + "args": { + "External id": 974135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936523828.074, "dur": 2.920, + "args": { + "External id": 974136,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13180, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338706, "tid": 2338706, + "ts": 6345936523935.300, "dur": 28694.400, + "args": { + "External id": 974137,"Record function id": 0, "Ev Idx": 13181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936524113.232, "dur": 7.676, + "args": { + "External id": 974138,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936524126.198, "dur": 1.122, + "args": { + "External id": 974139,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936524129.579, "dur": 4.406, + "args": { + "External id": 974140,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936524136.394, "dur": 0.978, + "args": { + "External id": 974141,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936524139.465, "dur": 0.899, + "args": { + "External id": 974142,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936524142.132, "dur": 1.145, + "args": { + "External id": 974143,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936524147.871, "dur": 0.712, + "args": { + "External id": 974144,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936524150.538, "dur": 2.381, + "args": { + "External id": 974145,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936524155.118, "dur": 1.081, + "args": { + "External id": 974146,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936524157.967, "dur": 0.927, + "args": { + "External id": 974147,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936524183.546, "dur": 28386.472, + "args": { + "External id": 974148,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936524202.391, "dur": 28357.040, + "args": { + "External id": 974149,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936524224.400, "dur": 19.453, + "args": { + "External id": 974150,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936524248.082, "dur": 28266.783, + "args": { + "External id": 974151,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936524251.405, "dur": 28261.710, + "args": { + "External id": 974152,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936524257.601, "dur": 5.592, + "args": { + "External id": 974153,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936524265.052, "dur": 28244.521, + "args": { + "External id": 974154,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936552821.662, "dur": 38.626, + "args": { + "External id": 974155,"Sequence number": 10552271, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13199 + } + }, + { + "ph": "s", "id": 195, "pid": 2338706, "tid": 2338706, "ts": 6345936552821.662, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936552842.892, "dur": 11.898, + "args": { + "External id": 974156,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936552848.283, "dur": 6.148, + "args": { + "External id": 974157,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936552937.661, "dur": 101.270, + "args": { + "External id": 974158,"Record function id": 0, "Ev Idx": 13202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936553041.986, "dur": 1377.229, + "args": { + "External id": 974159,"Record function id": 0, "Ev Idx": 13203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936553129.447, "dur": 1273.243, + "args": { + "External id": 974160,"Sequence number": 10552272, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13204 + } + }, + { + "ph": "s", "id": 194, "pid": 2338706, "tid": 2338706, "ts": 6345936553129.447, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936553217.732, "dur": 59.867, + "args": { + "External id": 974161,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936553295.867, "dur": 121.694, + "args": { + "External id": 974162,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936553434.252, "dur": 43.442, + "args": { + "External id": 974163,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936553490.649, "dur": 36.407, + "args": { + "External id": 974164,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936553555.441, "dur": 33.788, + "args": { + "External id": 974165,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936553613.666, "dur": 23.869, + "args": { + "External id": 974166,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936553665.094, "dur": 159.954, + "args": { + "External id": 974167,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936553725.301, "dur": 15.317, + "args": { + "External id": 974168,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936553732.702, "dur": 6.821, + "args": { + "External id": 974169,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936553745.215, "dur": 4.220, + "args": { + "External id": 974170,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936553750.909, "dur": 1.124, + "args": { + "External id": 974171,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936553755.282, "dur": 6.003, + "args": { + "External id": 974172,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936553847.554, "dur": 57.397, + "args": { + "External id": 974173,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936553942.091, "dur": 35.586, + "args": { + "External id": 974174,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936553990.431, "dur": 112.126, + "args": { + "External id": 974175,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936554117.296, "dur": 47.914, + "args": { + "External id": 974176,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936554194.103, "dur": 35.193, + "args": { + "External id": 974177,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936554236.957, "dur": 42.500, + "args": { + "External id": 974178,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936554301.967, "dur": 22.590, + "args": { + "External id": 974179,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13223 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.14)", "pid": 2338706, "tid": 2338706, + "ts": 6345936554495.383, "dur": 88.518, + "args": { + "External id": 974180,"Record function id": 0, "Ev Idx": 13224 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936554669.706, "dur": 57.632, + "args": { + "External id": 974181,"Record function id": 0, "Ev Idx": 13225 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.15)", "pid": 2338706, "tid": 2338706, + "ts": 6345936554738.316, "dur": 31552.698, + "args": { + "External id": 974182,"Record function id": 0, "Ev Idx": 13226 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338706, "tid": 2338706, + "ts": 6345936554749.471, "dur": 1095.037, + "args": { + "External id": 974183,"Record function id": 0, "Ev Idx": 13227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936554843.358, "dur": 10.161, + "args": { + "External id": 974184,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936554869.798, "dur": 46.324, + "args": { + "External id": 974185,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936554876.630, "dur": 3.037, + "args": { + "External id": 974186,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936554885.341, "dur": 0.451, + "args": { + "External id": 974187,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936554887.911, "dur": 0.562, + "args": { + "External id": 974188,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936554890.739, "dur": 0.597, + "args": { + "External id": 974189,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936554894.946, "dur": 0.576, + "args": { + "External id": 974190,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936554897.278, "dur": 0.892, + "args": { + "External id": 974191,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936554900.326, "dur": 4.532, + "args": { + "External id": 974192,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936554906.811, "dur": 0.464, + "args": { + "External id": 974193,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936554909.022, "dur": 0.450, + "args": { + "External id": 974194,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936554929.704, "dur": 56.406, + "args": { + "External id": 974195,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936555089.078, "dur": 171.287, + "args": { + "External id": 974196,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936555106.939, "dur": 7.252, + "args": { + "External id": 974197,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936555121.235, "dur": 13.428, + "args": { + "External id": 974198,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936555126.648, "dur": 7.551, + "args": { + "External id": 974199,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936555131.457, "dur": 0.941, + "args": { + "External id": 974200,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936555143.479, "dur": 37.407, + "args": { + "External id": 974201,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936555146.819, "dur": 3.003, + "args": { + "External id": 974202,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936555151.729, "dur": 0.609, + "args": { + "External id": 974203,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936555153.970, "dur": 0.542, + "args": { + "External id": 974204,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936555158.786, "dur": 2.969, + "args": { + "External id": 974205,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936555163.126, "dur": 0.603, + "args": { + "External id": 974206,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936555165.307, "dur": 0.419, + "args": { + "External id": 974207,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936555168.977, "dur": 0.604, + "args": { + "External id": 974208,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936555170.954, "dur": 0.589, + "args": { + "External id": 974209,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936555173.752, "dur": 2.280, + "args": { + "External id": 974210,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936555206.236, "dur": 43.286, + "args": { + "External id": 974211,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936555327.686, "dur": 406.723, + "args": { + "External id": 974212,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936555365.358, "dur": 362.960, + "args": { + "External id": 974213,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13257, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936555378.908, "dur": 343.356, + "args": { + "External id": 974214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936555760.549, "dur": 3.109, + "args": { + "External id": 974215,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13259, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338706, "tid": 2338706, + "ts": 6345936555868.191, "dur": 30131.548, + "args": { + "External id": 974216,"Record function id": 0, "Ev Idx": 13260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936555985.842, "dur": 6.226, + "args": { + "External id": 974217,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936555996.038, "dur": 1.112, + "args": { + "External id": 974218,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936555999.629, "dur": 3.529, + "args": { + "External id": 974219,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936556005.415, "dur": 0.862, + "args": { + "External id": 974220,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936556029.906, "dur": 2.732, + "args": { + "External id": 974221,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936556038.950, "dur": 1.224, + "args": { + "External id": 974222,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936556042.469, "dur": 0.853, + "args": { + "External id": 974223,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936556045.362, "dur": 2.344, + "args": { + "External id": 974224,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936556049.565, "dur": 0.857, + "args": { + "External id": 974225,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936556054.334, "dur": 0.946, + "args": { + "External id": 974226,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936556117.687, "dur": 29825.349, + "args": { + "External id": 974227,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936556138.554, "dur": 29794.490, + "args": { + "External id": 974228,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936556159.738, "dur": 18.292, + "args": { + "External id": 974229,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936556182.337, "dur": 29706.827, + "args": { + "External id": 974230,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936556185.406, "dur": 29702.094, + "args": { + "External id": 974231,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936556192.669, "dur": 6.845, + "args": { + "External id": 974232,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936556201.473, "dur": 29682.634, + "args": { + "External id": 974233,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936586219.614, "dur": 37.752, + "args": { + "External id": 974234,"Sequence number": 10552273, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13278 + } + }, + { + "ph": "s", "id": 193, "pid": 2338706, "tid": 2338706, "ts": 6345936586219.614, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936586241.544, "dur": 10.124, + "args": { + "External id": 974235,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936586245.841, "dur": 5.421, + "args": { + "External id": 974236,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936586337.900, "dur": 84.350, + "args": { + "External id": 974237,"Record function id": 0, "Ev Idx": 13281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936586423.915, "dur": 1312.549, + "args": { + "External id": 974238,"Record function id": 0, "Ev Idx": 13282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936586472.051, "dur": 1248.052, + "args": { + "External id": 974239,"Sequence number": 10552274, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13283 + } + }, + { + "ph": "s", "id": 192, "pid": 2338706, "tid": 2338706, "ts": 6345936586472.051, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936586554.326, "dur": 58.952, + "args": { + "External id": 974240,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936586631.372, "dur": 117.280, + "args": { + "External id": 974241,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936586763.433, "dur": 42.078, + "args": { + "External id": 974242,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936586818.329, "dur": 33.750, + "args": { + "External id": 974243,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936586882.025, "dur": 31.035, + "args": { + "External id": 974244,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936586937.910, "dur": 20.166, + "args": { + "External id": 974245,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936586983.337, "dur": 225.987, + "args": { + "External id": 974246,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936587110.664, "dur": 16.131, + "args": { + "External id": 974247,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936587117.590, "dur": 7.977, + "args": { + "External id": 974248,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936587130.164, "dur": 4.752, + "args": { + "External id": 974249,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936587136.463, "dur": 1.272, + "args": { + "External id": 974250,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936587140.710, "dur": 4.952, + "args": { + "External id": 974251,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936587222.881, "dur": 64.662, + "args": { + "External id": 974252,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936587328.361, "dur": 38.549, + "args": { + "External id": 974253,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936587378.935, "dur": 49.876, + "args": { + "External id": 974254,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936587439.350, "dur": 41.268, + "args": { + "External id": 974255,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936587507.244, "dur": 30.496, + "args": { + "External id": 974256,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936587546.567, "dur": 41.978, + "args": { + "External id": 974257,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936587610.362, "dur": 23.617, + "args": { + "External id": 974258,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13302 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.15)", "pid": 2338706, "tid": 2338706, + "ts": 6345936587810.777, "dur": 89.896, + "args": { + "External id": 974259,"Record function id": 0, "Ev Idx": 13303 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936587989.092, "dur": 119.629, + "args": { + "External id": 974260,"Record function id": 0, "Ev Idx": 13304 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.16)", "pid": 2338706, "tid": 2338706, + "ts": 6345936588122.428, "dur": 31117.047, + "args": { + "External id": 974261,"Record function id": 0, "Ev Idx": 13305 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338706, "tid": 2338706, + "ts": 6345936588134.809, "dur": 1142.672, + "args": { + "External id": 974262,"Record function id": 0, "Ev Idx": 13306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936588230.100, "dur": 11.600, + "args": { + "External id": 974263,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936588258.256, "dur": 43.138, + "args": { + "External id": 974264,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936588264.738, "dur": 2.569, + "args": { + "External id": 974265,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936588272.215, "dur": 0.520, + "args": { + "External id": 974266,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936588274.374, "dur": 0.651, + "args": { + "External id": 974267,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936588276.920, "dur": 0.644, + "args": { + "External id": 974268,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936588280.292, "dur": 0.483, + "args": { + "External id": 974269,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936588282.961, "dur": 0.469, + "args": { + "External id": 974270,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936588285.404, "dur": 4.741, + "args": { + "External id": 974271,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936588291.696, "dur": 0.525, + "args": { + "External id": 974272,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936588293.694, "dur": 0.359, + "args": { + "External id": 974273,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936588316.314, "dur": 63.466, + "args": { + "External id": 974274,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936588419.368, "dur": 147.811, + "args": { + "External id": 974275,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936588431.821, "dur": 4.514, + "args": { + "External id": 974276,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936588445.070, "dur": 15.314, + "args": { + "External id": 974277,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936588453.539, "dur": 6.341, + "args": { + "External id": 974278,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936588457.542, "dur": 0.853, + "args": { + "External id": 974279,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936588468.307, "dur": 35.905, + "args": { + "External id": 974280,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936588470.860, "dur": 2.928, + "args": { + "External id": 974281,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936588475.575, "dur": 0.502, + "args": { + "External id": 974282,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936588477.827, "dur": 0.504, + "args": { + "External id": 974283,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936588482.267, "dur": 2.738, + "args": { + "External id": 974284,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936588486.362, "dur": 0.585, + "args": { + "External id": 974285,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936588488.501, "dur": 0.515, + "args": { + "External id": 974286,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936588491.941, "dur": 0.513, + "args": { + "External id": 974287,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936588494.011, "dur": 0.596, + "args": { + "External id": 974288,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936588496.422, "dur": 2.258, + "args": { + "External id": 974289,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936588517.793, "dur": 38.982, + "args": { + "External id": 974290,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936588629.415, "dur": 524.815, + "args": { + "External id": 974291,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936588666.175, "dur": 481.155, + "args": { + "External id": 974292,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13336, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936588677.431, "dur": 462.870, + "args": { + "External id": 974293,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936589186.468, "dur": 3.172, + "args": { + "External id": 974294,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13338, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338706, "tid": 2338706, + "ts": 6345936589302.686, "dur": 29645.002, + "args": { + "External id": 974295,"Record function id": 0, "Ev Idx": 13339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936589425.284, "dur": 8.174, + "args": { + "External id": 974296,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936589437.698, "dur": 1.225, + "args": { + "External id": 974297,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936589441.082, "dur": 3.754, + "args": { + "External id": 974298,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936589446.945, "dur": 1.299, + "args": { + "External id": 974299,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936589450.320, "dur": 0.881, + "args": { + "External id": 974300,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936589452.956, "dur": 1.105, + "args": { + "External id": 974301,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936589458.332, "dur": 0.977, + "args": { + "External id": 974302,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936589461.176, "dur": 2.809, + "args": { + "External id": 974303,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936589466.036, "dur": 1.173, + "args": { + "External id": 974304,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936589468.891, "dur": 0.890, + "args": { + "External id": 974305,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936589492.696, "dur": 29401.721, + "args": { + "External id": 974306,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936589511.129, "dur": 29372.675, + "args": { + "External id": 974307,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936589531.054, "dur": 20.157, + "args": { + "External id": 974308,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936589555.291, "dur": 29284.464, + "args": { + "External id": 974309,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936589558.468, "dur": 29280.119, + "args": { + "External id": 974310,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936589565.330, "dur": 5.850, + "args": { + "External id": 974311,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936589573.093, "dur": 29260.978, + "args": { + "External id": 974312,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936619172.382, "dur": 37.026, + "args": { + "External id": 974313,"Sequence number": 10552275, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13357 + } + }, + { + "ph": "s", "id": 191, "pid": 2338706, "tid": 2338706, "ts": 6345936619172.382, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936619191.736, "dur": 12.396, + "args": { + "External id": 974314,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936619196.943, "dur": 6.759, + "args": { + "External id": 974315,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936619287.645, "dur": 85.942, + "args": { + "External id": 974316,"Record function id": 0, "Ev Idx": 13360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936619375.646, "dur": 1406.722, + "args": { + "External id": 974317,"Record function id": 0, "Ev Idx": 13361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936619422.023, "dur": 1342.942, + "args": { + "External id": 974318,"Sequence number": 10552276, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13362 + } + }, + { + "ph": "s", "id": 190, "pid": 2338706, "tid": 2338706, "ts": 6345936619422.023, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936619504.227, "dur": 58.835, + "args": { + "External id": 974319,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936619577.843, "dur": 117.340, + "args": { + "External id": 974320,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936619710.325, "dur": 44.635, + "args": { + "External id": 974321,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936619767.652, "dur": 36.031, + "args": { + "External id": 974322,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936619843.627, "dur": 33.838, + "args": { + "External id": 974323,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936619899.050, "dur": 22.743, + "args": { + "External id": 974324,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936619946.502, "dur": 226.697, + "args": { + "External id": 974325,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936620004.371, "dur": 36.326, + "args": { + "External id": 974326,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936620031.143, "dur": 8.309, + "args": { + "External id": 974327,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936620045.311, "dur": 4.735, + "args": { + "External id": 974328,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936620087.552, "dur": 4.039, + "args": { + "External id": 974329,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936620096.536, "dur": 7.254, + "args": { + "External id": 974330,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936620187.238, "dur": 69.982, + "args": { + "External id": 974331,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936620297.301, "dur": 38.713, + "args": { + "External id": 974332,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936620348.184, "dur": 50.695, + "args": { + "External id": 974333,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936620408.522, "dur": 40.546, + "args": { + "External id": 974334,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936620474.584, "dur": 34.162, + "args": { + "External id": 974335,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936620517.578, "dur": 42.169, + "args": { + "External id": 974336,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936620648.349, "dur": 27.198, + "args": { + "External id": 974337,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13381 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.16)", "pid": 2338706, "tid": 2338706, + "ts": 6345936620857.144, "dur": 94.432, + "args": { + "External id": 974338,"Record function id": 0, "Ev Idx": 13382 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936621103.614, "dur": 58.770, + "args": { + "External id": 974339,"Record function id": 0, "Ev Idx": 13383 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.17)", "pid": 2338706, "tid": 2338706, + "ts": 6345936621174.232, "dur": 30042.849, + "args": { + "External id": 974340,"Record function id": 0, "Ev Idx": 13384 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338706, "tid": 2338706, + "ts": 6345936621183.230, "dur": 1107.388, + "args": { + "External id": 974341,"Record function id": 0, "Ev Idx": 13385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936621280.702, "dur": 12.145, + "args": { + "External id": 974342,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936621309.606, "dur": 41.911, + "args": { + "External id": 974343,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936621315.792, "dur": 2.445, + "args": { + "External id": 974344,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936621322.676, "dur": 0.472, + "args": { + "External id": 974345,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936621324.524, "dur": 0.602, + "args": { + "External id": 974346,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936621326.618, "dur": 0.529, + "args": { + "External id": 974347,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936621330.508, "dur": 0.509, + "args": { + "External id": 974348,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936621332.696, "dur": 0.689, + "args": { + "External id": 974349,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936621334.925, "dur": 4.683, + "args": { + "External id": 974350,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936621341.450, "dur": 0.272, + "args": { + "External id": 974351,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936621343.555, "dur": 0.454, + "args": { + "External id": 974352,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936621364.862, "dur": 66.088, + "args": { + "External id": 974353,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936621473.448, "dur": 137.817, + "args": { + "External id": 974354,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936621486.221, "dur": 5.094, + "args": { + "External id": 974355,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936621497.243, "dur": 12.440, + "args": { + "External id": 974356,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936621502.298, "dur": 6.888, + "args": { + "External id": 974357,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936621507.242, "dur": 0.668, + "args": { + "External id": 974358,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936621516.950, "dur": 34.186, + "args": { + "External id": 974359,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936621519.518, "dur": 2.478, + "args": { + "External id": 974360,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936621523.538, "dur": 0.545, + "args": { + "External id": 974361,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936621525.576, "dur": 0.662, + "args": { + "External id": 974362,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936621530.267, "dur": 2.661, + "args": { + "External id": 974363,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936621534.536, "dur": 0.286, + "args": { + "External id": 974364,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936621536.528, "dur": 0.510, + "args": { + "External id": 974365,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936621540.257, "dur": 0.421, + "args": { + "External id": 974366,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936621542.527, "dur": 0.521, + "args": { + "External id": 974367,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936621544.464, "dur": 2.298, + "args": { + "External id": 974368,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936621564.812, "dur": 36.831, + "args": { + "External id": 974369,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936621672.344, "dur": 497.427, + "args": { + "External id": 974370,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936621709.759, "dur": 452.592, + "args": { + "External id": 974371,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13415, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936621722.159, "dur": 432.274, + "args": { + "External id": 974372,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936622200.098, "dur": 2.950, + "args": { + "External id": 974373,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13417, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338706, "tid": 2338706, + "ts": 6345936622316.041, "dur": 28603.806, + "args": { + "External id": 974374,"Record function id": 0, "Ev Idx": 13418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936622434.778, "dur": 7.677, + "args": { + "External id": 974375,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936622446.474, "dur": 1.458, + "args": { + "External id": 974376,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936622449.890, "dur": 3.610, + "args": { + "External id": 974377,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936622455.912, "dur": 1.008, + "args": { + "External id": 974378,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936622458.836, "dur": 0.881, + "args": { + "External id": 974379,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936622461.645, "dur": 0.860, + "args": { + "External id": 974380,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936622465.935, "dur": 0.792, + "args": { + "External id": 974381,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936622468.371, "dur": 2.514, + "args": { + "External id": 974382,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936622472.405, "dur": 0.799, + "args": { + "External id": 974383,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936622474.972, "dur": 0.926, + "args": { + "External id": 974384,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936622498.676, "dur": 28368.041, + "args": { + "External id": 974385,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936622518.184, "dur": 28337.382, + "args": { + "External id": 974386,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936622541.421, "dur": 20.158, + "args": { + "External id": 974387,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936622565.617, "dur": 28245.621, + "args": { + "External id": 974388,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936622568.464, "dur": 28240.999, + "args": { + "External id": 974389,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936622574.447, "dur": 5.750, + "args": { + "External id": 974390,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936622581.979, "dur": 28223.835, + "args": { + "External id": 974391,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936651144.379, "dur": 39.658, + "args": { + "External id": 974392,"Sequence number": 10552277, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13436 + } + }, + { + "ph": "s", "id": 189, "pid": 2338706, "tid": 2338706, "ts": 6345936651144.379, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936651164.786, "dur": 12.427, + "args": { + "External id": 974393,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936651170.196, "dur": 6.578, + "args": { + "External id": 974394,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936651271.741, "dur": 85.428, + "args": { + "External id": 974395,"Record function id": 0, "Ev Idx": 13439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936651358.714, "dur": 1334.186, + "args": { + "External id": 974396,"Record function id": 0, "Ev Idx": 13440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936651404.371, "dur": 1270.767, + "args": { + "External id": 974397,"Sequence number": 10552278, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13441 + } + }, + { + "ph": "s", "id": 188, "pid": 2338706, "tid": 2338706, "ts": 6345936651404.371, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936651486.002, "dur": 60.436, + "args": { + "External id": 974398,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936651563.907, "dur": 120.121, + "args": { + "External id": 974399,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936651700.190, "dur": 44.315, + "args": { + "External id": 974400,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936651757.301, "dur": 35.511, + "args": { + "External id": 974401,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936651823.185, "dur": 34.731, + "args": { + "External id": 974402,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936651883.032, "dur": 21.303, + "args": { + "External id": 974403,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936651930.379, "dur": 224.424, + "args": { + "External id": 974404,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936651985.576, "dur": 15.792, + "args": { + "External id": 974405,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936651993.010, "dur": 7.121, + "args": { + "External id": 974406,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936652005.777, "dur": 25.458, + "args": { + "External id": 974407,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936652035.226, "dur": 1.318, + "args": { + "External id": 974408,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936652039.676, "dur": 5.605, + "args": { + "External id": 974409,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936652173.132, "dur": 67.969, + "args": { + "External id": 974410,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936652284.830, "dur": 35.175, + "args": { + "External id": 974411,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936652333.726, "dur": 50.697, + "args": { + "External id": 974412,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936652394.476, "dur": 41.835, + "args": { + "External id": 974413,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936652462.430, "dur": 34.951, + "args": { + "External id": 974414,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936652505.083, "dur": 42.296, + "args": { + "External id": 974415,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936652567.197, "dur": 24.075, + "args": { + "External id": 974416,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13460 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.17)", "pid": 2338706, "tid": 2338706, + "ts": 6345936652766.200, "dur": 92.446, + "args": { + "External id": 974417,"Record function id": 0, "Ev Idx": 13461 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936652951.205, "dur": 54.813, + "args": { + "External id": 974418,"Record function id": 0, "Ev Idx": 13462 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.18)", "pid": 2338706, "tid": 2338706, + "ts": 6345936653041.102, "dur": 32422.100, + "args": { + "External id": 974419,"Record function id": 0, "Ev Idx": 13463 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338706, "tid": 2338706, + "ts": 6345936653091.272, "dur": 1092.110, + "args": { + "External id": 974420,"Record function id": 0, "Ev Idx": 13464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936653196.164, "dur": 11.629, + "args": { + "External id": 974421,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936653225.232, "dur": 39.339, + "args": { + "External id": 974422,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936653231.512, "dur": 2.613, + "args": { + "External id": 974423,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936653237.874, "dur": 0.448, + "args": { + "External id": 974424,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936653239.323, "dur": 0.685, + "args": { + "External id": 974425,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936653241.112, "dur": 0.653, + "args": { + "External id": 974426,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936653244.487, "dur": 0.391, + "args": { + "External id": 974427,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936653246.379, "dur": 0.620, + "args": { + "External id": 974428,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936653248.317, "dur": 3.984, + "args": { + "External id": 974429,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936653253.847, "dur": 0.673, + "args": { + "External id": 974430,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936653256.270, "dur": 0.362, + "args": { + "External id": 974431,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936653277.252, "dur": 66.835, + "args": { + "External id": 974432,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936653384.486, "dur": 138.387, + "args": { + "External id": 974433,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936653397.658, "dur": 4.766, + "args": { + "External id": 974434,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936653408.955, "dur": 11.462, + "args": { + "External id": 974435,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936653413.927, "dur": 5.925, + "args": { + "External id": 974436,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936653417.941, "dur": 0.597, + "args": { + "External id": 974437,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936653428.426, "dur": 33.363, + "args": { + "External id": 974438,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936653431.455, "dur": 1.984, + "args": { + "External id": 974439,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936653434.591, "dur": 0.555, + "args": { + "External id": 974440,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936653436.216, "dur": 0.513, + "args": { + "External id": 974441,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936653443.313, "dur": 2.618, + "args": { + "External id": 974442,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936653446.960, "dur": 0.676, + "args": { + "External id": 974443,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936653449.041, "dur": 0.353, + "args": { + "External id": 974444,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936653451.809, "dur": 0.585, + "args": { + "External id": 974445,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936653453.397, "dur": 0.596, + "args": { + "External id": 974446,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936653455.270, "dur": 1.316, + "args": { + "External id": 974447,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936653474.361, "dur": 38.310, + "args": { + "External id": 974448,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936653587.488, "dur": 410.264, + "args": { + "External id": 974449,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936653623.782, "dur": 368.848, + "args": { + "External id": 974450,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13494, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936653635.909, "dur": 350.566, + "args": { + "External id": 974451,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936654050.969, "dur": 41.084, + "args": { + "External id": 974452,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13496, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338706, "tid": 2338706, + "ts": 6345936654208.311, "dur": 31005.854, + "args": { + "External id": 974453,"Record function id": 0, "Ev Idx": 13497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936654330.656, "dur": 7.786, + "args": { + "External id": 974454,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936654342.525, "dur": 1.321, + "args": { + "External id": 974455,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936654345.886, "dur": 3.965, + "args": { + "External id": 974456,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936654351.831, "dur": 1.140, + "args": { + "External id": 974457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936654354.525, "dur": 1.051, + "args": { + "External id": 974458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936654357.138, "dur": 0.912, + "args": { + "External id": 974459,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936654359.666, "dur": 1.200, + "args": { + "External id": 974460,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936654362.702, "dur": 2.277, + "args": { + "External id": 974461,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936654366.631, "dur": 1.158, + "args": { + "External id": 974462,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936654371.989, "dur": 0.918, + "args": { + "External id": 974463,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936654392.939, "dur": 30764.960, + "args": { + "External id": 974464,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936654412.440, "dur": 30735.380, + "args": { + "External id": 974465,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936654433.357, "dur": 19.800, + "args": { + "External id": 974466,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936654457.136, "dur": 30644.953, + "args": { + "External id": 974467,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936654460.065, "dur": 30640.798, + "args": { + "External id": 974468,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936654465.969, "dur": 5.794, + "args": { + "External id": 974469,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936654474.139, "dur": 30622.483, + "args": { + "External id": 974470,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936685397.620, "dur": 35.431, + "args": { + "External id": 974471,"Sequence number": 10552279, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13515 + } + }, + { + "ph": "s", "id": 187, "pid": 2338706, "tid": 2338706, "ts": 6345936685397.620, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936685416.671, "dur": 10.465, + "args": { + "External id": 974472,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936685421.652, "dur": 5.157, + "args": { + "External id": 974473,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936685509.131, "dur": 84.407, + "args": { + "External id": 974474,"Record function id": 0, "Ev Idx": 13518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936685595.238, "dur": 1323.778, + "args": { + "External id": 974475,"Record function id": 0, "Ev Idx": 13519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936685638.964, "dur": 1263.386, + "args": { + "External id": 974476,"Sequence number": 10552280, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13520 + } + }, + { + "ph": "s", "id": 186, "pid": 2338706, "tid": 2338706, "ts": 6345936685638.964, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936685732.547, "dur": 56.050, + "args": { + "External id": 974477,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936685804.832, "dur": 117.822, + "args": { + "External id": 974478,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936685938.150, "dur": 44.336, + "args": { + "External id": 974479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936685990.910, "dur": 58.929, + "args": { + "External id": 974480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936686135.482, "dur": 36.511, + "args": { + "External id": 974481,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936686196.788, "dur": 23.874, + "args": { + "External id": 974482,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936686247.047, "dur": 159.980, + "args": { + "External id": 974483,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936686307.093, "dur": 15.941, + "args": { + "External id": 974484,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936686314.620, "dur": 7.477, + "args": { + "External id": 974485,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936686327.328, "dur": 5.139, + "args": { + "External id": 974486,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936686333.890, "dur": 2.886, + "args": { + "External id": 974487,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936686339.600, "dur": 5.614, + "args": { + "External id": 974488,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936686422.019, "dur": 65.518, + "args": { + "External id": 974489,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936686523.535, "dur": 38.115, + "args": { + "External id": 974490,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936686573.752, "dur": 49.402, + "args": { + "External id": 974491,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936686632.707, "dur": 40.505, + "args": { + "External id": 974492,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936686698.308, "dur": 33.953, + "args": { + "External id": 974493,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936686738.324, "dur": 40.927, + "args": { + "External id": 974494,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936686800.102, "dur": 24.353, + "args": { + "External id": 974495,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13539 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.18)", "pid": 2338706, "tid": 2338706, + "ts": 6345936686989.920, "dur": 157.860, + "args": { + "External id": 974496,"Record function id": 0, "Ev Idx": 13540 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936687244.879, "dur": 56.680, + "args": { + "External id": 974497,"Record function id": 0, "Ev Idx": 13541 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.19)", "pid": 2338706, "tid": 2338706, + "ts": 6345936687312.877, "dur": 31410.528, + "args": { + "External id": 974498,"Record function id": 0, "Ev Idx": 13542 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338706, "tid": 2338706, + "ts": 6345936687323.084, "dur": 1075.098, + "args": { + "External id": 974499,"Record function id": 0, "Ev Idx": 13543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936687417.580, "dur": 10.599, + "args": { + "External id": 974500,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936687445.211, "dur": 38.477, + "args": { + "External id": 974501,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936687451.580, "dur": 2.455, + "args": { + "External id": 974502,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936687458.606, "dur": 0.464, + "args": { + "External id": 974503,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936687460.511, "dur": 0.477, + "args": { + "External id": 974504,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936687461.915, "dur": 0.623, + "args": { + "External id": 974505,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936687465.105, "dur": 0.498, + "args": { + "External id": 974506,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936687466.669, "dur": 0.597, + "args": { + "External id": 974507,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936687468.347, "dur": 3.950, + "args": { + "External id": 974508,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936687473.781, "dur": 0.465, + "args": { + "External id": 974509,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936687475.573, "dur": 0.462, + "args": { + "External id": 974510,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936687497.059, "dur": 62.428, + "args": { + "External id": 974511,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936687599.019, "dur": 133.319, + "args": { + "External id": 974512,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936687611.070, "dur": 4.545, + "args": { + "External id": 974513,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936687621.862, "dur": 11.565, + "args": { + "External id": 974514,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936687627.152, "dur": 5.772, + "args": { + "External id": 974515,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936687630.959, "dur": 0.638, + "args": { + "External id": 974516,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936687641.257, "dur": 29.128, + "args": { + "External id": 974517,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936687643.499, "dur": 1.894, + "args": { + "External id": 974518,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936687646.701, "dur": 0.449, + "args": { + "External id": 974519,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936687648.286, "dur": 0.369, + "args": { + "External id": 974520,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936687651.624, "dur": 2.855, + "args": { + "External id": 974521,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936687655.862, "dur": 0.557, + "args": { + "External id": 974522,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936687657.428, "dur": 0.288, + "args": { + "External id": 974523,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936687660.393, "dur": 0.464, + "args": { + "External id": 974524,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936687661.930, "dur": 0.314, + "args": { + "External id": 974525,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936687663.577, "dur": 2.080, + "args": { + "External id": 974526,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936687684.049, "dur": 38.562, + "args": { + "External id": 974527,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936687792.857, "dur": 485.731, + "args": { + "External id": 974528,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936687830.253, "dur": 441.953, + "args": { + "External id": 974529,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13573, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936687841.806, "dur": 423.541, + "args": { + "External id": 974530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936688307.618, "dur": 2.996, + "args": { + "External id": 974531,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13575, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338706, "tid": 2338706, + "ts": 6345936688423.105, "dur": 30066.689, + "args": { + "External id": 974532,"Record function id": 0, "Ev Idx": 13576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936688543.679, "dur": 7.694, + "args": { + "External id": 974533,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936688555.306, "dur": 1.327, + "args": { + "External id": 974534,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936688558.692, "dur": 3.449, + "args": { + "External id": 974535,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936688564.616, "dur": 1.071, + "args": { + "External id": 974536,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936688567.468, "dur": 0.825, + "args": { + "External id": 974537,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936688570.033, "dur": 1.001, + "args": { + "External id": 974538,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936688575.051, "dur": 0.749, + "args": { + "External id": 974539,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936688577.378, "dur": 2.631, + "args": { + "External id": 974540,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936688581.807, "dur": 0.553, + "args": { + "External id": 974541,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936688583.987, "dur": 0.826, + "args": { + "External id": 974542,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936688608.033, "dur": 29827.287, + "args": { + "External id": 974543,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936688626.624, "dur": 29799.087, + "args": { + "External id": 974544,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936688649.596, "dur": 21.238, + "args": { + "External id": 974545,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936688674.859, "dur": 29706.831, + "args": { + "External id": 974546,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936688681.567, "dur": 29698.377, + "args": { + "External id": 974547,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936688687.237, "dur": 6.622, + "args": { + "External id": 974548,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936688695.697, "dur": 29680.513, + "args": { + "External id": 974549,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936718657.726, "dur": 37.211, + "args": { + "External id": 974550,"Sequence number": 10552281, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13594 + } + }, + { + "ph": "s", "id": 185, "pid": 2338706, "tid": 2338706, "ts": 6345936718657.726, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936718678.529, "dur": 10.352, + "args": { + "External id": 974551,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936718683.584, "dur": 5.021, + "args": { + "External id": 974552,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936718767.790, "dur": 82.769, + "args": { + "External id": 974553,"Record function id": 0, "Ev Idx": 13597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936718852.366, "dur": 1330.822, + "args": { + "External id": 974554,"Record function id": 0, "Ev Idx": 13598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936718898.423, "dur": 1266.858, + "args": { + "External id": 974555,"Sequence number": 10552282, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13599 + } + }, + { + "ph": "s", "id": 184, "pid": 2338706, "tid": 2338706, "ts": 6345936718898.423, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936718980.659, "dur": 107.200, + "args": { + "External id": 974556,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936719110.467, "dur": 115.207, + "args": { + "External id": 974557,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936719241.145, "dur": 44.002, + "args": { + "External id": 974558,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936719294.671, "dur": 36.336, + "args": { + "External id": 974559,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936719367.081, "dur": 35.132, + "args": { + "External id": 974560,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936719426.125, "dur": 21.577, + "args": { + "External id": 974561,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936719473.009, "dur": 149.144, + "args": { + "External id": 974562,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936719529.515, "dur": 12.853, + "args": { + "External id": 974563,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936719535.503, "dur": 6.025, + "args": { + "External id": 974564,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936719545.392, "dur": 4.471, + "args": { + "External id": 974565,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936719551.263, "dur": 1.345, + "args": { + "External id": 974566,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936719555.576, "dur": 5.521, + "args": { + "External id": 974567,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936719634.596, "dur": 55.540, + "args": { + "External id": 974568,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936719725.311, "dur": 34.965, + "args": { + "External id": 974569,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936719771.977, "dur": 47.894, + "args": { + "External id": 974570,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936719830.225, "dur": 39.652, + "args": { + "External id": 974571,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936719896.828, "dur": 31.406, + "args": { + "External id": 974572,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936719936.387, "dur": 41.916, + "args": { + "External id": 974573,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936719998.591, "dur": 41.485, + "args": { + "External id": 974574,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13618 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.19)", "pid": 2338706, "tid": 2338706, + "ts": 6345936720259.742, "dur": 89.308, + "args": { + "External id": 974575,"Record function id": 0, "Ev Idx": 13619 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936720439.112, "dur": 55.409, + "args": { + "External id": 974576,"Record function id": 0, "Ev Idx": 13620 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.20)", "pid": 2338706, "tid": 2338706, + "ts": 6345936720504.988, "dur": 31461.168, + "args": { + "External id": 974577,"Record function id": 0, "Ev Idx": 13621 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338706, "tid": 2338706, + "ts": 6345936720513.251, "dur": 1107.919, + "args": { + "External id": 974578,"Record function id": 0, "Ev Idx": 13622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936720607.277, "dur": 10.882, + "args": { + "External id": 974579,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936720633.309, "dur": 37.377, + "args": { + "External id": 974580,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936720639.726, "dur": 2.522, + "args": { + "External id": 974581,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936720646.248, "dur": 0.540, + "args": { + "External id": 974582,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936720647.853, "dur": 0.568, + "args": { + "External id": 974583,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936720649.648, "dur": 0.650, + "args": { + "External id": 974584,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936720653.033, "dur": 0.795, + "args": { + "External id": 974585,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936720655.114, "dur": 0.340, + "args": { + "External id": 974586,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936720656.517, "dur": 3.601, + "args": { + "External id": 974587,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936720661.150, "dur": 0.570, + "args": { + "External id": 974588,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936720662.705, "dur": 0.395, + "args": { + "External id": 974589,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936720683.493, "dur": 62.485, + "args": { + "External id": 974590,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936720783.752, "dur": 190.340, + "args": { + "External id": 974591,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936720796.217, "dur": 5.703, + "args": { + "External id": 974592,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936720807.864, "dur": 11.114, + "args": { + "External id": 974593,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936720812.679, "dur": 5.812, + "args": { + "External id": 974594,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936720816.357, "dur": 0.562, + "args": { + "External id": 974595,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936720827.070, "dur": 68.645, + "args": { + "External id": 974596,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936720828.967, "dur": 1.617, + "args": { + "External id": 974597,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936720832.079, "dur": 0.590, + "args": { + "External id": 974598,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936720836.023, "dur": 0.356, + "args": { + "External id": 974599,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936720847.489, "dur": 2.454, + "args": { + "External id": 974600,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936720852.304, "dur": 0.361, + "args": { + "External id": 974601,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936720854.812, "dur": 0.445, + "args": { + "External id": 974602,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936720858.050, "dur": 0.342, + "args": { + "External id": 974603,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936720860.072, "dur": 0.668, + "args": { + "External id": 974604,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936720863.679, "dur": 23.673, + "args": { + "External id": 974605,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936720918.763, "dur": 44.425, + "args": { + "External id": 974606,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936721095.689, "dur": 412.460, + "args": { + "External id": 974607,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936721135.980, "dur": 365.856, + "args": { + "External id": 974608,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13652, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936721150.455, "dur": 345.334, + "args": { + "External id": 974609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936721535.121, "dur": 3.000, + "args": { + "External id": 974610,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13654, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338706, "tid": 2338706, + "ts": 6345936721646.803, "dur": 30072.306, + "args": { + "External id": 974611,"Record function id": 0, "Ev Idx": 13655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936721763.805, "dur": 7.442, + "args": { + "External id": 974612,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936721775.272, "dur": 1.351, + "args": { + "External id": 974613,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936721778.634, "dur": 3.292, + "args": { + "External id": 974614,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936721783.989, "dur": 0.919, + "args": { + "External id": 974615,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936721786.915, "dur": 1.218, + "args": { + "External id": 974616,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936721789.662, "dur": 1.297, + "args": { + "External id": 974617,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936721793.359, "dur": 1.162, + "args": { + "External id": 974618,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936721796.633, "dur": 2.269, + "args": { + "External id": 974619,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936721800.773, "dur": 0.882, + "args": { + "External id": 974620,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936721805.579, "dur": 0.792, + "args": { + "External id": 974621,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936721826.591, "dur": 29839.534, + "args": { + "External id": 974622,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936721845.107, "dur": 29810.407, + "args": { + "External id": 974623,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936721863.923, "dur": 18.832, + "args": { + "External id": 974624,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936721886.704, "dur": 29720.812, + "args": { + "External id": 974625,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936721889.526, "dur": 29716.562, + "args": { + "External id": 974626,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936721894.885, "dur": 5.758, + "args": { + "External id": 974627,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936721902.659, "dur": 29699.154, + "args": { + "External id": 974628,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936751897.420, "dur": 40.765, + "args": { + "External id": 974629,"Sequence number": 10552283, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13673 + } + }, + { + "ph": "s", "id": 183, "pid": 2338706, "tid": 2338706, "ts": 6345936751897.420, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936751917.983, "dur": 14.410, + "args": { + "External id": 974630,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936751925.824, "dur": 6.321, + "args": { + "External id": 974631,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936752025.310, "dur": 120.244, + "args": { + "External id": 974632,"Record function id": 0, "Ev Idx": 13676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936752148.925, "dur": 1322.926, + "args": { + "External id": 974633,"Record function id": 0, "Ev Idx": 13677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936752194.488, "dur": 1259.757, + "args": { + "External id": 974634,"Sequence number": 10552284, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13678 + } + }, + { + "ph": "s", "id": 182, "pid": 2338706, "tid": 2338706, "ts": 6345936752194.488, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936752281.924, "dur": 60.834, + "args": { + "External id": 974635,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936752360.121, "dur": 120.384, + "args": { + "External id": 974636,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936752498.915, "dur": 44.118, + "args": { + "External id": 974637,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936752551.141, "dur": 34.048, + "args": { + "External id": 974638,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936752620.232, "dur": 30.789, + "args": { + "External id": 974639,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936752674.321, "dur": 20.800, + "args": { + "External id": 974640,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936752722.372, "dur": 156.053, + "args": { + "External id": 974641,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936752779.206, "dur": 15.673, + "args": { + "External id": 974642,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936752786.382, "dur": 7.541, + "args": { + "External id": 974643,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936752799.396, "dur": 4.800, + "args": { + "External id": 974644,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936752806.139, "dur": 1.177, + "args": { + "External id": 974645,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936752811.661, "dur": 5.392, + "args": { + "External id": 974646,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936752890.708, "dur": 57.640, + "args": { + "External id": 974647,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936752983.283, "dur": 56.374, + "args": { + "External id": 974648,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936753093.484, "dur": 58.638, + "args": { + "External id": 974649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936753163.320, "dur": 41.956, + "args": { + "External id": 974650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936753232.568, "dur": 40.897, + "args": { + "External id": 974651,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936753280.740, "dur": 44.893, + "args": { + "External id": 974652,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936753350.047, "dur": 22.846, + "args": { + "External id": 974653,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13697 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.20)", "pid": 2338706, "tid": 2338706, + "ts": 6345936753546.701, "dur": 94.044, + "args": { + "External id": 974654,"Record function id": 0, "Ev Idx": 13698 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936753731.635, "dur": 54.602, + "args": { + "External id": 974655,"Record function id": 0, "Ev Idx": 13699 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.21)", "pid": 2338706, "tid": 2338706, + "ts": 6345936753796.393, "dur": 33118.688, + "args": { + "External id": 974656,"Record function id": 0, "Ev Idx": 13700 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338706, "tid": 2338706, + "ts": 6345936753805.308, "dur": 1141.935, + "args": { + "External id": 974657,"Record function id": 0, "Ev Idx": 13701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936753903.375, "dur": 10.738, + "args": { + "External id": 974658,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936753929.501, "dur": 41.072, + "args": { + "External id": 974659,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936753935.766, "dur": 2.620, + "args": { + "External id": 974660,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936753942.631, "dur": 0.617, + "args": { + "External id": 974661,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936753945.062, "dur": 0.378, + "args": { + "External id": 974662,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936753947.005, "dur": 0.470, + "args": { + "External id": 974663,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936753950.417, "dur": 0.416, + "args": { + "External id": 974664,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936753952.464, "dur": 0.407, + "args": { + "External id": 974665,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936753954.886, "dur": 3.575, + "args": { + "External id": 974666,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936753960.133, "dur": 0.335, + "args": { + "External id": 974667,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936753962.200, "dur": 0.367, + "args": { + "External id": 974668,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936753984.261, "dur": 124.345, + "args": { + "External id": 974669,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936754153.095, "dur": 149.825, + "args": { + "External id": 974670,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936754167.407, "dur": 6.297, + "args": { + "External id": 974671,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936754179.980, "dur": 13.129, + "args": { + "External id": 974672,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936754184.945, "dur": 7.609, + "args": { + "External id": 974673,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936754189.760, "dur": 0.968, + "args": { + "External id": 974674,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936754201.416, "dur": 32.455, + "args": { + "External id": 974675,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936754204.222, "dur": 1.918, + "args": { + "External id": 974676,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936754207.568, "dur": 0.492, + "args": { + "External id": 974677,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936754209.979, "dur": 0.541, + "args": { + "External id": 974678,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936754213.974, "dur": 2.703, + "args": { + "External id": 974679,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936754218.121, "dur": 0.378, + "args": { + "External id": 974680,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936754220.251, "dur": 0.341, + "args": { + "External id": 974681,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936754222.914, "dur": 0.378, + "args": { + "External id": 974682,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936754224.797, "dur": 0.557, + "args": { + "External id": 974683,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936754226.911, "dur": 1.830, + "args": { + "External id": 974684,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936754249.171, "dur": 43.496, + "args": { + "External id": 974685,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936754368.418, "dur": 468.122, + "args": { + "External id": 974686,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936754404.201, "dur": 426.294, + "args": { + "External id": 974687,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13731, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936754426.166, "dur": 397.776, + "args": { + "External id": 974688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936754862.603, "dur": 2.631, + "args": { + "External id": 974689,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13733, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338706, "tid": 2338706, + "ts": 6345936754971.317, "dur": 31686.339, + "args": { + "External id": 974690,"Record function id": 0, "Ev Idx": 13734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936755159.833, "dur": 8.417, + "args": { + "External id": 974691,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936755172.664, "dur": 1.339, + "args": { + "External id": 974692,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936755176.277, "dur": 3.500, + "args": { + "External id": 974693,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936755181.648, "dur": 1.101, + "args": { + "External id": 974694,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936755184.282, "dur": 0.811, + "args": { + "External id": 974695,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936755186.574, "dur": 1.015, + "args": { + "External id": 974696,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936755191.650, "dur": 0.913, + "args": { + "External id": 974697,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936755194.283, "dur": 1.987, + "args": { + "External id": 974698,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936755198.242, "dur": 0.859, + "args": { + "External id": 974699,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936755200.654, "dur": 0.874, + "args": { + "External id": 974700,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936755226.603, "dur": 31378.164, + "args": { + "External id": 974701,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936755247.325, "dur": 31347.694, + "args": { + "External id": 974702,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936755269.030, "dur": 19.790, + "args": { + "External id": 974703,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936755293.122, "dur": 31258.437, + "args": { + "External id": 974704,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936755296.066, "dur": 31253.778, + "args": { + "External id": 974705,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936755302.307, "dur": 5.931, + "args": { + "External id": 974706,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936755309.957, "dur": 31236.615, + "args": { + "External id": 974707,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936786840.555, "dur": 40.559, + "args": { + "External id": 974708,"Sequence number": 10552285, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13752 + } + }, + { + "ph": "s", "id": 181, "pid": 2338706, "tid": 2338706, "ts": 6345936786840.555, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936786862.900, "dur": 11.339, + "args": { + "External id": 974709,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936786868.224, "dur": 5.763, + "args": { + "External id": 974710,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936786966.375, "dur": 132.519, + "args": { + "External id": 974711,"Record function id": 0, "Ev Idx": 13755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936787102.527, "dur": 1303.528, + "args": { + "External id": 974712,"Record function id": 0, "Ev Idx": 13756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936787153.505, "dur": 1234.838, + "args": { + "External id": 974713,"Sequence number": 10552286, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13757 + } + }, + { + "ph": "s", "id": 180, "pid": 2338706, "tid": 2338706, "ts": 6345936787153.505, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936787238.143, "dur": 57.193, + "args": { + "External id": 974714,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936787311.850, "dur": 119.407, + "args": { + "External id": 974715,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936787446.829, "dur": 41.506, + "args": { + "External id": 974716,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936787497.669, "dur": 33.630, + "args": { + "External id": 974717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936787562.149, "dur": 29.743, + "args": { + "External id": 974718,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936787615.335, "dur": 19.748, + "args": { + "External id": 974719,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936787660.244, "dur": 153.651, + "args": { + "External id": 974720,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936787718.465, "dur": 14.723, + "args": { + "External id": 974721,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936787725.505, "dur": 6.661, + "args": { + "External id": 974722,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936787737.636, "dur": 4.689, + "args": { + "External id": 974723,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936787744.001, "dur": 1.199, + "args": { + "External id": 974724,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936787748.341, "dur": 5.431, + "args": { + "External id": 974725,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936787826.055, "dur": 58.751, + "args": { + "External id": 974726,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936787923.460, "dur": 36.330, + "args": { + "External id": 974727,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936787971.194, "dur": 71.680, + "args": { + "External id": 974728,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936788102.613, "dur": 53.206, + "args": { + "External id": 974729,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936788184.974, "dur": 31.910, + "args": { + "External id": 974730,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936788224.486, "dur": 42.887, + "args": { + "External id": 974731,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936788287.819, "dur": 19.712, + "args": { + "External id": 974732,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13776 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.21)", "pid": 2338706, "tid": 2338706, + "ts": 6345936788480.829, "dur": 91.732, + "args": { + "External id": 974733,"Record function id": 0, "Ev Idx": 13777 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936788662.525, "dur": 56.726, + "args": { + "External id": 974734,"Record function id": 0, "Ev Idx": 13778 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.22)", "pid": 2338706, "tid": 2338706, + "ts": 6345936788730.273, "dur": 31927.576, + "args": { + "External id": 974735,"Record function id": 0, "Ev Idx": 13779 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338706, "tid": 2338706, + "ts": 6345936788740.714, "dur": 1085.471, + "args": { + "External id": 974736,"Record function id": 0, "Ev Idx": 13780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936788836.678, "dur": 10.175, + "args": { + "External id": 974737,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936788862.155, "dur": 41.265, + "args": { + "External id": 974738,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936788868.311, "dur": 2.639, + "args": { + "External id": 974739,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936788875.010, "dur": 0.463, + "args": { + "External id": 974740,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936788877.470, "dur": 0.507, + "args": { + "External id": 974741,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936788879.710, "dur": 0.475, + "args": { + "External id": 974742,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936788883.020, "dur": 0.345, + "args": { + "External id": 974743,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936788885.150, "dur": 0.456, + "args": { + "External id": 974744,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936788887.290, "dur": 4.231, + "args": { + "External id": 974745,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936788893.859, "dur": 0.521, + "args": { + "External id": 974746,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936788896.215, "dur": 0.485, + "args": { + "External id": 974747,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936788915.938, "dur": 56.038, + "args": { + "External id": 974748,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936789031.411, "dur": 194.642, + "args": { + "External id": 974749,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936789046.153, "dur": 6.837, + "args": { + "External id": 974750,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936789098.159, "dur": 14.537, + "args": { + "External id": 974751,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936789104.117, "dur": 8.117, + "args": { + "External id": 974752,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936789108.860, "dur": 1.125, + "args": { + "External id": 974753,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936789122.196, "dur": 31.924, + "args": { + "External id": 974754,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936789125.239, "dur": 1.911, + "args": { + "External id": 974755,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936789128.853, "dur": 0.545, + "args": { + "External id": 974756,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936789131.304, "dur": 0.551, + "args": { + "External id": 974757,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936789134.756, "dur": 2.749, + "args": { + "External id": 974758,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936789138.959, "dur": 0.375, + "args": { + "External id": 974759,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936789140.748, "dur": 0.503, + "args": { + "External id": 974760,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936789143.942, "dur": 0.413, + "args": { + "External id": 974761,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936789145.921, "dur": 0.283, + "args": { + "External id": 974762,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936789147.609, "dur": 1.596, + "args": { + "External id": 974763,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936789168.227, "dur": 47.708, + "args": { + "External id": 974764,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936789294.049, "dur": 424.684, + "args": { + "External id": 974765,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936789339.268, "dur": 373.745, + "args": { + "External id": 974766,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13810, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936789351.713, "dur": 352.005, + "args": { + "External id": 974767,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936789746.512, "dur": 2.860, + "args": { + "External id": 974768,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13812, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338706, "tid": 2338706, + "ts": 6345936789853.863, "dur": 30545.342, + "args": { + "External id": 974769,"Record function id": 0, "Ev Idx": 13813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936789967.476, "dur": 6.650, + "args": { + "External id": 974770,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936789978.050, "dur": 1.191, + "args": { + "External id": 974771,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936789981.113, "dur": 3.340, + "args": { + "External id": 974772,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936789986.472, "dur": 0.904, + "args": { + "External id": 974773,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936789989.020, "dur": 1.140, + "args": { + "External id": 974774,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936789991.651, "dur": 0.925, + "args": { + "External id": 974775,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936789994.495, "dur": 1.071, + "args": { + "External id": 974776,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936789997.545, "dur": 2.278, + "args": { + "External id": 974777,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936790001.521, "dur": 0.849, + "args": { + "External id": 974778,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936790005.699, "dur": 0.802, + "args": { + "External id": 974779,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936790051.116, "dur": 30289.866, + "args": { + "External id": 974780,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936790116.974, "dur": 30213.768, + "args": { + "External id": 974781,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936790134.543, "dur": 19.941, + "args": { + "External id": 974782,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936790159.139, "dur": 30128.746, + "args": { + "External id": 974783,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936790163.297, "dur": 30122.880, + "args": { + "External id": 974784,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936790169.550, "dur": 6.074, + "args": { + "External id": 974785,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936790177.438, "dur": 30105.366, + "args": { + "External id": 974786,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936820578.747, "dur": 45.428, + "args": { + "External id": 974787,"Sequence number": 10552287, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13831 + } + }, + { + "ph": "s", "id": 179, "pid": 2338706, "tid": 2338706, "ts": 6345936820578.747, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936820602.422, "dur": 14.771, + "args": { + "External id": 974788,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936820610.387, "dur": 6.484, + "args": { + "External id": 974789,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936820710.982, "dur": 84.131, + "args": { + "External id": 974790,"Record function id": 0, "Ev Idx": 13834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936820797.024, "dur": 1437.544, + "args": { + "External id": 974791,"Record function id": 0, "Ev Idx": 13835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936820846.987, "dur": 1368.875, + "args": { + "External id": 974792,"Sequence number": 10552288, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13836 + } + }, + { + "ph": "s", "id": 178, "pid": 2338706, "tid": 2338706, "ts": 6345936820846.987, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936820937.311, "dur": 58.464, + "args": { + "External id": 974793,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936821034.895, "dur": 163.429, + "args": { + "External id": 974794,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936821219.768, "dur": 46.343, + "args": { + "External id": 974795,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936821276.308, "dur": 35.911, + "args": { + "External id": 974796,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936821351.204, "dur": 34.926, + "args": { + "External id": 974797,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936821410.589, "dur": 23.914, + "args": { + "External id": 974798,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936821461.453, "dur": 157.105, + "args": { + "External id": 974799,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936821518.744, "dur": 15.872, + "args": { + "External id": 974800,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936821525.831, "dur": 7.798, + "args": { + "External id": 974801,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936821539.124, "dur": 5.009, + "args": { + "External id": 974802,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936821545.439, "dur": 1.075, + "args": { + "External id": 974803,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936821549.468, "dur": 5.720, + "args": { + "External id": 974804,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936821631.844, "dur": 59.369, + "args": { + "External id": 974805,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936821729.085, "dur": 35.722, + "args": { + "External id": 974806,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936821775.740, "dur": 50.619, + "args": { + "External id": 974807,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936821838.598, "dur": 46.796, + "args": { + "External id": 974808,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936821910.493, "dur": 48.321, + "args": { + "External id": 974809,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936821967.118, "dur": 59.571, + "args": { + "External id": 974810,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936822094.879, "dur": 26.102, + "args": { + "External id": 974811,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13855 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.22)", "pid": 2338706, "tid": 2338706, + "ts": 6345936822313.304, "dur": 96.094, + "args": { + "External id": 974812,"Record function id": 0, "Ev Idx": 13856 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936822499.109, "dur": 51.964, + "args": { + "External id": 974813,"Record function id": 0, "Ev Idx": 13857 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.23)", "pid": 2338706, "tid": 2338706, + "ts": 6345936822562.529, "dur": 31114.754, + "args": { + "External id": 974814,"Record function id": 0, "Ev Idx": 13858 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338706, "tid": 2338706, + "ts": 6345936822572.344, "dur": 1088.646, + "args": { + "External id": 974815,"Record function id": 0, "Ev Idx": 13859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936822665.582, "dur": 11.568, + "args": { + "External id": 974816,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936822692.877, "dur": 42.194, + "args": { + "External id": 974817,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936822699.627, "dur": 2.557, + "args": { + "External id": 974818,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936822706.664, "dur": 0.518, + "args": { + "External id": 974819,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936822709.043, "dur": 0.503, + "args": { + "External id": 974820,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936822711.565, "dur": 0.373, + "args": { + "External id": 974821,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936822715.007, "dur": 0.609, + "args": { + "External id": 974822,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936822717.825, "dur": 0.370, + "args": { + "External id": 974823,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936822719.684, "dur": 4.063, + "args": { + "External id": 974824,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936822725.393, "dur": 0.579, + "args": { + "External id": 974825,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936822728.211, "dur": 0.543, + "args": { + "External id": 974826,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936822748.283, "dur": 64.939, + "args": { + "External id": 974827,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936822852.603, "dur": 140.589, + "args": { + "External id": 974828,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936822866.201, "dur": 4.324, + "args": { + "External id": 974829,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936822876.509, "dur": 12.258, + "args": { + "External id": 974830,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936822881.890, "dur": 6.333, + "args": { + "External id": 974831,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936822886.111, "dur": 0.646, + "args": { + "External id": 974832,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936822896.423, "dur": 31.874, + "args": { + "External id": 974833,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936822899.264, "dur": 1.458, + "args": { + "External id": 974834,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936822902.579, "dur": 0.436, + "args": { + "External id": 974835,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936822904.530, "dur": 0.440, + "args": { + "External id": 974836,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936822908.050, "dur": 2.700, + "args": { + "External id": 974837,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936822912.573, "dur": 0.516, + "args": { + "External id": 974838,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936822914.653, "dur": 0.444, + "args": { + "External id": 974839,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936822918.009, "dur": 0.324, + "args": { + "External id": 974840,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936822919.686, "dur": 0.638, + "args": { + "External id": 974841,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936822922.114, "dur": 2.098, + "args": { + "External id": 974842,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936822942.952, "dur": 41.288, + "args": { + "External id": 974843,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936823122.812, "dur": 427.067, + "args": { + "External id": 974844,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936823162.473, "dur": 381.881, + "args": { + "External id": 974845,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13889, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936823176.161, "dur": 361.510, + "args": { + "External id": 974846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936823575.814, "dur": 2.881, + "args": { + "External id": 974847,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13891, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338706, "tid": 2338706, + "ts": 6345936823686.229, "dur": 29720.775, + "args": { + "External id": 974848,"Record function id": 0, "Ev Idx": 13892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936823856.841, "dur": 8.264, + "args": { + "External id": 974849,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936823870.004, "dur": 1.026, + "args": { + "External id": 974850,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936823873.099, "dur": 3.034, + "args": { + "External id": 974851,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936823880.574, "dur": 0.815, + "args": { + "External id": 974852,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936823882.948, "dur": 1.037, + "args": { + "External id": 974853,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936823885.542, "dur": 0.877, + "args": { + "External id": 974854,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936823890.029, "dur": 0.680, + "args": { + "External id": 974855,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936823894.196, "dur": 2.092, + "args": { + "External id": 974856,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936823898.053, "dur": 0.911, + "args": { + "External id": 974857,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936823900.749, "dur": 0.889, + "args": { + "External id": 974858,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936823925.402, "dur": 29426.541, + "args": { + "External id": 974859,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936823944.599, "dur": 29397.331, + "args": { + "External id": 974860,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936823961.901, "dur": 19.040, + "args": { + "External id": 974861,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936823987.432, "dur": 29308.637, + "args": { + "External id": 974862,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936823990.214, "dur": 29304.061, + "args": { + "External id": 974863,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936823996.001, "dur": 6.443, + "args": { + "External id": 974864,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936824004.310, "dur": 29286.503, + "args": { + "External id": 974865,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936853596.445, "dur": 45.879, + "args": { + "External id": 974866,"Sequence number": 10552289, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13910 + } + }, + { + "ph": "s", "id": 177, "pid": 2338706, "tid": 2338706, "ts": 6345936853596.445, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936853623.186, "dur": 12.300, + "args": { + "External id": 974867,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936853628.917, "dur": 6.290, + "args": { + "External id": 974868,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936853726.517, "dur": 83.849, + "args": { + "External id": 974869,"Record function id": 0, "Ev Idx": 13913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936853811.752, "dur": 1395.816, + "args": { + "External id": 974870,"Record function id": 0, "Ev Idx": 13914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936853862.210, "dur": 1328.864, + "args": { + "External id": 974871,"Sequence number": 10552290, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13915 + } + }, + { + "ph": "s", "id": 176, "pid": 2338706, "tid": 2338706, "ts": 6345936853862.210, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936853954.437, "dur": 77.874, + "args": { + "External id": 974872,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936854094.539, "dur": 124.236, + "args": { + "External id": 974873,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936854239.082, "dur": 49.075, + "args": { + "External id": 974874,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936854299.655, "dur": 38.431, + "args": { + "External id": 974875,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936854375.641, "dur": 36.144, + "args": { + "External id": 974876,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936854437.140, "dur": 22.278, + "args": { + "External id": 974877,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936854485.113, "dur": 155.829, + "args": { + "External id": 974878,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936854542.684, "dur": 16.202, + "args": { + "External id": 974879,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936854550.475, "dur": 7.339, + "args": { + "External id": 974880,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936854563.606, "dur": 5.040, + "args": { + "External id": 974881,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936854570.308, "dur": 1.178, + "args": { + "External id": 974882,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936854574.457, "dur": 5.789, + "args": { + "External id": 974883,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936854653.230, "dur": 54.340, + "args": { + "External id": 974884,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936854742.130, "dur": 33.887, + "args": { + "External id": 974885,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936854787.644, "dur": 48.657, + "args": { + "External id": 974886,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936854847.305, "dur": 40.016, + "args": { + "External id": 974887,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936854912.455, "dur": 32.757, + "args": { + "External id": 974888,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936854952.578, "dur": 41.258, + "args": { + "External id": 974889,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936855038.535, "dur": 61.960, + "args": { + "External id": 974890,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13934 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.23)", "pid": 2338706, "tid": 2338706, + "ts": 6345936855286.117, "dur": 94.949, + "args": { + "External id": 974891,"Record function id": 0, "Ev Idx": 13935 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936855473.111, "dur": 56.628, + "args": { + "External id": 974892,"Record function id": 0, "Ev Idx": 13936 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.24)", "pid": 2338706, "tid": 2338706, + "ts": 6345936855541.045, "dur": 31178.873, + "args": { + "External id": 974893,"Record function id": 0, "Ev Idx": 13937 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338706, "tid": 2338706, + "ts": 6345936855551.217, "dur": 1068.010, + "args": { + "External id": 974894,"Record function id": 0, "Ev Idx": 13938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936855647.193, "dur": 11.048, + "args": { + "External id": 974895,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936855675.192, "dur": 39.746, + "args": { + "External id": 974896,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936855681.487, "dur": 2.581, + "args": { + "External id": 974897,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936855688.528, "dur": 0.719, + "args": { + "External id": 974898,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936855690.844, "dur": 0.369, + "args": { + "External id": 974899,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936855692.709, "dur": 0.827, + "args": { + "External id": 974900,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936855696.092, "dur": 0.526, + "args": { + "External id": 974901,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936855698.390, "dur": 0.654, + "args": { + "External id": 974902,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936855700.535, "dur": 2.825, + "args": { + "External id": 974903,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936855705.462, "dur": 0.897, + "args": { + "External id": 974904,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936855708.077, "dur": 0.609, + "args": { + "External id": 974905,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936855729.094, "dur": 62.214, + "args": { + "External id": 974906,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936855829.637, "dur": 138.181, + "args": { + "External id": 974907,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936855843.602, "dur": 5.364, + "args": { + "External id": 974908,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936855855.202, "dur": 12.702, + "args": { + "External id": 974909,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936855860.498, "dur": 6.888, + "args": { + "External id": 974910,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936855865.322, "dur": 0.673, + "args": { + "External id": 974911,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936855875.551, "dur": 32.511, + "args": { + "External id": 974912,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936855878.232, "dur": 2.122, + "args": { + "External id": 974913,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936855882.493, "dur": 0.627, + "args": { + "External id": 974914,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936855884.791, "dur": 0.582, + "args": { + "External id": 974915,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936855889.162, "dur": 2.529, + "args": { + "External id": 974916,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936855893.254, "dur": 0.297, + "args": { + "External id": 974917,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936855895.176, "dur": 0.562, + "args": { + "External id": 974918,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936855898.591, "dur": 0.287, + "args": { + "External id": 974919,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936855900.622, "dur": 0.374, + "args": { + "External id": 974920,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936855902.202, "dur": 1.634, + "args": { + "External id": 974921,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936855920.458, "dur": 38.023, + "args": { + "External id": 974922,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936856090.832, "dur": 416.621, + "args": { + "External id": 974923,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936856131.311, "dur": 370.695, + "args": { + "External id": 974924,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13968, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936856144.805, "dur": 351.067, + "args": { + "External id": 974925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936856534.306, "dur": 2.843, + "args": { + "External id": 974926,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13970, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338706, "tid": 2338706, + "ts": 6345936856643.552, "dur": 29853.966, + "args": { + "External id": 974927,"Record function id": 0, "Ev Idx": 13971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936856759.929, "dur": 7.130, + "args": { + "External id": 974928,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936856771.287, "dur": 1.336, + "args": { + "External id": 974929,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936856774.642, "dur": 3.396, + "args": { + "External id": 974930,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936856779.849, "dur": 0.852, + "args": { + "External id": 974931,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936856794.454, "dur": 0.923, + "args": { + "External id": 974932,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936856798.709, "dur": 0.648, + "args": { + "External id": 974933,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936856803.514, "dur": 0.841, + "args": { + "External id": 974934,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936856806.165, "dur": 2.012, + "args": { + "External id": 974935,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936856809.794, "dur": 0.838, + "args": { + "External id": 974936,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936856812.171, "dur": 1.166, + "args": { + "External id": 974937,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936856837.833, "dur": 29608.920, + "args": { + "External id": 974938,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936856856.413, "dur": 29580.154, + "args": { + "External id": 974939,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936856871.925, "dur": 19.377, + "args": { + "External id": 974940,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936856895.726, "dur": 29499.776, + "args": { + "External id": 974941,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936856898.675, "dur": 29495.304, + "args": { + "External id": 974942,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936856904.198, "dur": 6.187, + "args": { + "External id": 974943,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936856912.177, "dur": 29477.990, + "args": { + "External id": 974944,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936886657.578, "dur": 35.512, + "args": { + "External id": 974945,"Sequence number": 10552291, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13989 + } + }, + { + "ph": "s", "id": 175, "pid": 2338706, "tid": 2338706, "ts": 6345936886657.578, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936886677.570, "dur": 10.109, + "args": { + "External id": 974946,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936886682.514, "dur": 4.940, + "args": { + "External id": 974947,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936886766.939, "dur": 80.488, + "args": { + "External id": 974948,"Record function id": 0, "Ev Idx": 13992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936886849.599, "dur": 1366.175, + "args": { + "External id": 974949,"Record function id": 0, "Ev Idx": 13993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936886895.149, "dur": 1304.112, + "args": { + "External id": 974950,"Sequence number": 10552292, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13994 + } + }, + { + "ph": "s", "id": 174, "pid": 2338706, "tid": 2338706, "ts": 6345936886895.149, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936886976.260, "dur": 73.670, + "args": { + "External id": 974951,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936887107.891, "dur": 119.367, + "args": { + "External id": 974952,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936887243.740, "dur": 44.364, + "args": { + "External id": 974953,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936887297.899, "dur": 36.028, + "args": { + "External id": 974954,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936887372.332, "dur": 36.339, + "args": { + "External id": 974955,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936887432.564, "dur": 22.085, + "args": { + "External id": 974956,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 14000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936887480.024, "dur": 151.988, + "args": { + "External id": 974957,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936887537.268, "dur": 14.773, + "args": { + "External id": 974958,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936887544.788, "dur": 6.265, + "args": { + "External id": 974959,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936887555.433, "dur": 4.576, + "args": { + "External id": 974960,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936887561.531, "dur": 1.363, + "args": { + "External id": 974961,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936887565.803, "dur": 5.216, + "args": { + "External id": 974962,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936887643.799, "dur": 58.550, + "args": { + "External id": 974963,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 14007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936887737.905, "dur": 36.610, + "args": { + "External id": 974964,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 14008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936887786.113, "dur": 49.749, + "args": { + "External id": 974965,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936887846.677, "dur": 41.152, + "args": { + "External id": 974966,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936887914.114, "dur": 37.448, + "args": { + "External id": 974967,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 14011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936887959.730, "dur": 43.356, + "args": { + "External id": 974968,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 14012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936888047.623, "dur": 65.891, + "args": { + "External id": 974969,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 14013 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.24)", "pid": 2338706, "tid": 2338706, + "ts": 6345936888294.170, "dur": 96.289, + "args": { + "External id": 974970,"Record function id": 0, "Ev Idx": 14014 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345936888482.510, "dur": 55.342, + "args": { + "External id": 974971,"Record function id": 0, "Ev Idx": 14015 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.25)", "pid": 2338706, "tid": 2338706, + "ts": 6345936888549.626, "dur": 32762.209, + "args": { + "External id": 974972,"Record function id": 0, "Ev Idx": 14016 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 2338706, "tid": 2338706, + "ts": 6345936888559.172, "dur": 1071.391, + "args": { + "External id": 974973,"Record function id": 0, "Ev Idx": 14017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936888657.440, "dur": 11.404, + "args": { + "External id": 974974,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936888685.521, "dur": 45.388, + "args": { + "External id": 974975,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 14019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936888692.097, "dur": 2.748, + "args": { + "External id": 974976,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936888699.251, "dur": 0.506, + "args": { + "External id": 974977,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936888701.633, "dur": 0.612, + "args": { + "External id": 974978,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936888708.097, "dur": 0.671, + "args": { + "External id": 974979,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936888712.017, "dur": 0.673, + "args": { + "External id": 974980,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936888714.147, "dur": 0.664, + "args": { + "External id": 974981,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936888716.319, "dur": 3.600, + "args": { + "External id": 974982,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936888721.378, "dur": 0.557, + "args": { + "External id": 974983,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936888723.707, "dur": 0.289, + "args": { + "External id": 974984,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936888743.864, "dur": 59.758, + "args": { + "External id": 974985,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 14029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345936888841.565, "dur": 141.779, + "args": { + "External id": 974986,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 14030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936888853.880, "dur": 5.438, + "args": { + "External id": 974987,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345936888865.476, "dur": 11.996, + "args": { + "External id": 974988,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 14032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936888870.510, "dur": 6.450, + "args": { + "External id": 974989,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 14033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936888874.885, "dur": 0.674, + "args": { + "External id": 974990,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 14034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345936888885.605, "dur": 30.374, + "args": { + "External id": 974991,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 14035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936888888.287, "dur": 1.568, + "args": { + "External id": 974992,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936888891.649, "dur": 0.493, + "args": { + "External id": 974993,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936888893.724, "dur": 0.465, + "args": { + "External id": 974994,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936888897.027, "dur": 2.783, + "args": { + "External id": 974995,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936888901.289, "dur": 0.381, + "args": { + "External id": 974996,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936888903.238, "dur": 0.413, + "args": { + "External id": 974997,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936888906.435, "dur": 0.364, + "args": { + "External id": 974998,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936888908.425, "dur": 0.466, + "args": { + "External id": 974999,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936888910.492, "dur": 1.217, + "args": { + "External id": 975000,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936888931.415, "dur": 42.520, + "args": { + "External id": 975001,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 14045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345936889107.211, "dur": 411.546, + "args": { + "External id": 975002,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 14046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936889144.477, "dur": 368.185, + "args": { + "External id": 975003,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 14047, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345936889158.591, "dur": 348.367, + "args": { + "External id": 975004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 14048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345936889546.795, "dur": 2.762, + "args": { + "External id": 975005,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 14049, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 2338706, "tid": 2338706, + "ts": 6345936889654.454, "dur": 31350.638, + "args": { + "External id": 975006,"Record function id": 0, "Ev Idx": 14050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936889774.382, "dur": 7.404, + "args": { + "External id": 975007,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 14051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936889786.277, "dur": 1.072, + "args": { + "External id": 975008,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936889789.805, "dur": 3.126, + "args": { + "External id": 975009,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 14053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936889794.855, "dur": 1.106, + "args": { + "External id": 975010,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 14054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936889797.470, "dur": 0.926, + "args": { + "External id": 975011,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 14055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936889800.031, "dur": 1.132, + "args": { + "External id": 975012,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 14056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936889803.434, "dur": 0.824, + "args": { + "External id": 975013,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936889806.214, "dur": 2.267, + "args": { + "External id": 975014,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 14058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936889810.501, "dur": 0.998, + "args": { + "External id": 975015,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 14059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936889815.314, "dur": 0.836, + "args": { + "External id": 975016,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 14060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936889837.612, "dur": 31112.014, + "args": { + "External id": 975017,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 14061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936889855.640, "dur": 31083.691, + "args": { + "External id": 975018,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 14062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936889879.113, "dur": 19.581, + "args": { + "External id": 975019,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936889902.945, "dur": 30988.734, + "args": { + "External id": 975020,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 14064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936889905.758, "dur": 30984.107, + "args": { + "External id": 975021,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 14065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936889911.396, "dur": 5.930, + "args": { + "External id": 975022,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936889919.363, "dur": 30966.740, + "args": { + "External id": 975023,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 14067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936921231.246, "dur": 44.230, + "args": { + "External id": 975024,"Sequence number": 10552293, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 14068 + } + }, + { + "ph": "s", "id": 173, "pid": 2338706, "tid": 2338706, "ts": 6345936921231.246, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936921255.548, "dur": 12.769, + "args": { + "External id": 975025,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 14069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936921261.125, "dur": 6.758, + "args": { + "External id": 975026,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936921366.137, "dur": 84.098, + "args": { + "External id": 975027,"Record function id": 0, "Ev Idx": 14071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345936921451.831, "dur": 1345.281, + "args": { + "External id": 975028,"Record function id": 0, "Ev Idx": 14072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936921499.253, "dur": 1281.611, + "args": { + "External id": 975029,"Sequence number": 10552294, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 14073 + } + }, + { + "ph": "s", "id": 172, "pid": 2338706, "tid": 2338706, "ts": 6345936921499.253, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936921587.552, "dur": 67.473, + "args": { + "External id": 975030,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 14074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936921673.519, "dur": 120.761, + "args": { + "External id": 975031,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 14075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936921809.225, "dur": 43.903, + "args": { + "External id": 975032,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 14076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936921862.492, "dur": 34.418, + "args": { + "External id": 975033,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 14077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936921927.061, "dur": 32.633, + "args": { + "External id": 975034,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 14078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345936921985.665, "dur": 43.578, + "args": { + "External id": 975035,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 14079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936922099.573, "dur": 168.762, + "args": { + "External id": 975036,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936922160.102, "dur": 18.883, + "args": { + "External id": 975037,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936922168.426, "dur": 8.693, + "args": { + "External id": 975038,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936922182.874, "dur": 4.673, + "args": { + "External id": 975039,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936922189.160, "dur": 1.706, + "args": { + "External id": 975040,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936922194.539, "dur": 6.085, + "args": { + "External id": 975041,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936922282.733, "dur": 67.614, + "args": { + "External id": 975042,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 14086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345936922392.774, "dur": 35.479, + "args": { + "External id": 975043,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 14087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936922439.613, "dur": 51.755, + "args": { + "External id": 975044,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936922503.468, "dur": 41.113, + "args": { + "External id": 975045,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936922572.345, "dur": 32.050, + "args": { + "External id": 975046,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 14090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936922611.648, "dur": 42.335, + "args": { + "External id": 975047,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 14091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345936922673.398, "dur": 22.829, + "args": { + "External id": 975048,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 14092 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.25)", "pid": 2338706, "tid": 2338706, + "ts": 6345936922873.168, "dur": 40.912, + "args": { + "External id": 975049,"Record function id": 0, "Ev Idx": 14093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936923141.920, "dur": 336.239, + "args": { + "External id": 975050,"Sequence number": 10552295, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14094 + } + }, + { + "ph": "s", "id": 171, "pid": 2338706, "tid": 2338706, "ts": 6345936923141.920, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936923182.690, "dur": 10.100, + "args": { + "External id": 975051,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936923185.213, "dur": 7.065, + "args": { + "External id": 975052,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936923204.554, "dur": 15.851, + "args": { + "External id": 975053,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936923209.138, "dur": 10.577, + "args": { + "External id": 975054,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936923230.529, "dur": 5.494, + "args": { + "External id": 975055,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936923454.144, "dur": 6.682, + "args": { + "External id": 975056,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936923457.389, "dur": 3.158, + "args": { + "External id": 975057,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936923511.899, "dur": 158.592, + "args": { + "External id": 975058,"Sequence number": 10552296, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936923514.545, "dur": 17.127, + "args": { + "External id": 975059,"Sequence number": 10552296, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14103 + } + }, + { + "ph": "s", "id": 170, "pid": 2338706, "tid": 2338706, "ts": 6345936923514.545, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936923519.871, "dur": 9.554, + "args": { + "External id": 975060,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936923526.604, "dur": 2.327, + "args": { + "External id": 975061,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936923534.535, "dur": 135.617, + "args": { + "External id": 975062,"Sequence number": 10552297, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936923538.018, "dur": 5.279, + "args": { + "External id": 975063,"Sequence number": 10552297, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936923539.359, "dur": 3.763, + "args": { + "External id": 975064,"Sequence number": 10552297, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14108 + } + }, + { + "ph": "s", "id": 169, "pid": 2338706, "tid": 2338706, "ts": 6345936923539.359, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936923545.511, "dur": 110.123, + "args": { + "External id": 975065,"Sequence number": 10552298, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14109 + } + }, + { + "ph": "s", "id": 168, "pid": 2338706, "tid": 2338706, "ts": 6345936923545.511, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936923659.826, "dur": 9.125, + "args": { + "External id": 975066,"Sequence number": 10552299, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14110 + } + }, + { + "ph": "s", "id": 167, "pid": 2338706, "tid": 2338706, "ts": 6345936923659.826, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936923686.225, "dur": 74.986, + "args": { + "External id": 975067,"Sequence number": 10552300, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936923687.226, "dur": 9.172, + "args": { + "External id": 975068,"Sequence number": 10552300, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14112 + } + }, + { + "ph": "s", "id": 166, "pid": 2338706, "tid": 2338706, "ts": 6345936923687.226, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936923689.349, "dur": 5.738, + "args": { + "External id": 975069,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936923693.968, "dur": 0.958, + "args": { + "External id": 975070,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936923697.253, "dur": 63.671, + "args": { + "External id": 975071,"Sequence number": 10552301, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936923698.583, "dur": 5.124, + "args": { + "External id": 975072,"Sequence number": 10552301, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936923699.569, "dur": 3.936, + "args": { + "External id": 975073,"Sequence number": 10552301, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14117 + } + }, + { + "ph": "s", "id": 165, "pid": 2338706, "tid": 2338706, "ts": 6345936923699.569, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936923704.579, "dur": 48.906, + "args": { + "External id": 975074,"Sequence number": 10552302, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14118 + } + }, + { + "ph": "s", "id": 164, "pid": 2338706, "tid": 2338706, "ts": 6345936923704.579, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936923755.773, "dur": 4.525, + "args": { + "External id": 975075,"Sequence number": 10552303, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14119 + } + }, + { + "ph": "s", "id": 163, "pid": 2338706, "tid": 2338706, "ts": 6345936923755.773, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936923770.217, "dur": 71.000, + "args": { + "External id": 975076,"Sequence number": 10552304, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936923771.152, "dur": 6.582, + "args": { + "External id": 975077,"Sequence number": 10552304, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14121 + } + }, + { + "ph": "s", "id": 162, "pid": 2338706, "tid": 2338706, "ts": 6345936923771.152, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936923773.038, "dur": 3.560, + "args": { + "External id": 975078,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936923775.515, "dur": 0.917, + "args": { + "External id": 975079,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936923780.905, "dur": 60.045, + "args": { + "External id": 975080,"Sequence number": 10552305, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936923782.194, "dur": 4.959, + "args": { + "External id": 975081,"Sequence number": 10552305, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936923783.362, "dur": 3.635, + "args": { + "External id": 975082,"Sequence number": 10552305, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14126 + } + }, + { + "ph": "s", "id": 161, "pid": 2338706, "tid": 2338706, "ts": 6345936923783.362, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936923787.657, "dur": 44.021, + "args": { + "External id": 975083,"Sequence number": 10552306, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14127 + } + }, + { + "ph": "s", "id": 160, "pid": 2338706, "tid": 2338706, "ts": 6345936923787.657, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936923834.107, "dur": 6.269, + "args": { + "External id": 975084,"Sequence number": 10552307, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14128 + } + }, + { + "ph": "s", "id": 159, "pid": 2338706, "tid": 2338706, "ts": 6345936923834.107, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936923867.089, "dur": 4.410, + "args": { + "External id": 975085,"Sequence number": 10552308, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936923868.243, "dur": 3.078, + "args": { + "External id": 975086,"Sequence number": 10552308, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14130 + } + }, + { + "ph": "s", "id": 158, "pid": 2338706, "tid": 2338706, "ts": 6345936923868.243, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936923880.944, "dur": 3.929, + "args": { + "External id": 975087,"Sequence number": 10552309, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936923882.216, "dur": 2.521, + "args": { + "External id": 975088,"Sequence number": 10552309, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14132 + } + }, + { + "ph": "s", "id": 157, "pid": 2338706, "tid": 2338706, "ts": 6345936923882.216, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936923892.290, "dur": 4.923, + "args": { + "External id": 975089,"Sequence number": 10552310, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936923893.686, "dur": 3.349, + "args": { + "External id": 975090,"Sequence number": 10552310, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14134 + } + }, + { + "ph": "s", "id": 156, "pid": 2338706, "tid": 2338706, "ts": 6345936923893.686, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936923938.025, "dur": 274.164, + "args": { + "External id": 975091,"Sequence number": 10552311, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14135 + } + }, + { + "ph": "s", "id": 155, "pid": 2338706, "tid": 2338706, "ts": 6345936923938.025, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936923964.539, "dur": 10.286, + "args": { + "External id": 975092,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936923968.627, "dur": 5.662, + "args": { + "External id": 975093,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936924231.309, "dur": 151.274, + "args": { + "External id": 975094,"Sequence number": 10552312, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14138 + } + }, + { + "ph": "s", "id": 154, "pid": 2338706, "tid": 2338706, "ts": 6345936924231.309, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936924250.549, "dur": 10.710, + "args": { + "External id": 975095,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936924254.160, "dur": 6.683, + "args": { + "External id": 975096,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338706, "tid": 2338706, + "ts": 6345936924419.134, "dur": 212.886, + "args": { + "External id": 975097,"Sequence number": 10552313, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14141 + } + }, + { + "ph": "s", "id": 153, "pid": 2338706, "tid": 2338706, "ts": 6345936924419.134, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936924452.234, "dur": 147.215, + "args": { + "External id": 975098,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936924511.192, "dur": 9.786, + "args": { + "External id": 975099,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936924514.576, "dur": 5.858, + "args": { + "External id": 975100,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936924524.333, "dur": 5.040, + "args": { + "External id": 975101,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936924531.265, "dur": 1.604, + "args": { + "External id": 975102,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936924537.092, "dur": 3.597, + "args": { + "External id": 975103,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2338706, + "ts": 6345936924615.456, "dur": 5.807, + "args": { + "External id": 975104,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936924638.494, "dur": 7.647, + "args": { + "External id": 975105,"Sequence number": 10552314, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936924640.758, "dur": 5.149, + "args": { + "External id": 975106,"Sequence number": 10552314, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14150 + } + }, + { + "ph": "s", "id": 152, "pid": 2338706, "tid": 2338706, "ts": 6345936924640.758, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936924662.031, "dur": 142.821, + "args": { + "External id": 975107,"Sequence number": 10552315, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936924666.004, "dur": 15.403, + "args": { + "External id": 975108,"Sequence number": 10552315, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14152 + } + }, + { + "ph": "s", "id": 151, "pid": 2338706, "tid": 2338706, "ts": 6345936924666.004, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936924669.174, "dur": 11.085, + "args": { + "External id": 975109,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936924677.649, "dur": 2.305, + "args": { + "External id": 975110,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936924682.824, "dur": 121.655, + "args": { + "External id": 975111,"Sequence number": 10552316, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936924685.409, "dur": 8.320, + "args": { + "External id": 975112,"Sequence number": 10552316, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936924689.473, "dur": 4.075, + "args": { + "External id": 975113,"Sequence number": 10552316, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14157 + } + }, + { + "ph": "s", "id": 150, "pid": 2338706, "tid": 2338706, "ts": 6345936924689.473, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936924694.941, "dur": 98.941, + "args": { + "External id": 975114,"Sequence number": 10552317, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14158 + } + }, + { + "ph": "s", "id": 149, "pid": 2338706, "tid": 2338706, "ts": 6345936924694.941, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936924797.558, "dur": 6.008, + "args": { + "External id": 975115,"Sequence number": 10552318, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14159 + } + }, + { + "ph": "s", "id": 148, "pid": 2338706, "tid": 2338706, "ts": 6345936924797.558, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936924846.835, "dur": 329.793, + "args": { + "External id": 975116,"Sequence number": 10552319, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14160 + } + }, + { + "ph": "s", "id": 147, "pid": 2338706, "tid": 2338706, "ts": 6345936924846.835, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936924873.159, "dur": 3.071, + "args": { + "External id": 975117,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936924874.288, "dur": 1.765, + "args": { + "External id": 975118,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936924881.779, "dur": 6.180, + "args": { + "External id": 975119,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936924883.039, "dur": 4.770, + "args": { + "External id": 975120,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936924884.025, "dur": 3.667, + "args": { + "External id": 975121,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936924896.487, "dur": 9.939, + "args": { + "External id": 975122,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936924901.179, "dur": 4.887, + "args": { + "External id": 975123,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936924913.721, "dur": 3.347, + "args": { + "External id": 975124,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936924921.302, "dur": 4.126, + "args": { + "External id": 975125,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936925142.438, "dur": 5.649, + "args": { + "External id": 975126,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936925144.264, "dur": 3.453, + "args": { + "External id": 975127,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936925152.478, "dur": 5.437, + "args": { + "External id": 975128,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936925156.546, "dur": 1.266, + "args": { + "External id": 975129,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936925201.003, "dur": 126.062, + "args": { + "External id": 975130,"Sequence number": 10552320, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936925202.559, "dur": 9.851, + "args": { + "External id": 975131,"Sequence number": 10552320, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14175 + } + }, + { + "ph": "s", "id": 146, "pid": 2338706, "tid": 2338706, "ts": 6345936925202.559, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936925205.886, "dur": 5.120, + "args": { + "External id": 975132,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936925208.809, "dur": 1.854, + "args": { + "External id": 975133,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936925213.429, "dur": 113.282, + "args": { + "External id": 975134,"Sequence number": 10552321, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936925217.796, "dur": 3.582, + "args": { + "External id": 975135,"Sequence number": 10552321, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936925218.739, "dur": 2.497, + "args": { + "External id": 975136,"Sequence number": 10552321, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14180 + } + }, + { + "ph": "s", "id": 145, "pid": 2338706, "tid": 2338706, "ts": 6345936925218.739, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936925222.766, "dur": 91.820, + "args": { + "External id": 975137,"Sequence number": 10552322, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14181 + } + }, + { + "ph": "s", "id": 144, "pid": 2338706, "tid": 2338706, "ts": 6345936925222.766, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936925318.129, "dur": 7.803, + "args": { + "External id": 975138,"Sequence number": 10552323, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14182 + } + }, + { + "ph": "s", "id": 143, "pid": 2338706, "tid": 2338706, "ts": 6345936925318.129, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936925340.679, "dur": 81.411, + "args": { + "External id": 975139,"Sequence number": 10552324, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936925341.566, "dur": 7.067, + "args": { + "External id": 975140,"Sequence number": 10552324, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14184 + } + }, + { + "ph": "s", "id": 142, "pid": 2338706, "tid": 2338706, "ts": 6345936925341.566, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936925343.463, "dur": 3.691, + "args": { + "External id": 975141,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936925345.985, "dur": 0.976, + "args": { + "External id": 975142,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936925349.458, "dur": 72.233, + "args": { + "External id": 975143,"Sequence number": 10552325, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936925352.954, "dur": 6.602, + "args": { + "External id": 975144,"Sequence number": 10552325, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936925354.238, "dur": 5.141, + "args": { + "External id": 975145,"Sequence number": 10552325, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14189 + } + }, + { + "ph": "s", "id": 141, "pid": 2338706, "tid": 2338706, "ts": 6345936925354.238, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936925360.243, "dur": 54.830, + "args": { + "External id": 975146,"Sequence number": 10552326, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14190 + } + }, + { + "ph": "s", "id": 140, "pid": 2338706, "tid": 2338706, "ts": 6345936925360.243, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936925417.529, "dur": 3.694, + "args": { + "External id": 975147,"Sequence number": 10552327, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14191 + } + }, + { + "ph": "s", "id": 139, "pid": 2338706, "tid": 2338706, "ts": 6345936925417.529, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936925451.425, "dur": 179.947, + "args": { + "External id": 975148,"Sequence number": 10552328, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14192 + } + }, + { + "ph": "s", "id": 138, "pid": 2338706, "tid": 2338706, "ts": 6345936925451.425, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936925501.687, "dur": 5.324, + "args": { + "External id": 975149,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936925545.396, "dur": 70.764, + "args": { + "External id": 975150,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936925546.142, "dur": 6.283, + "args": { + "External id": 975151,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936925547.705, "dur": 3.760, + "args": { + "External id": 975152,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936925550.183, "dur": 1.056, + "args": { + "External id": 975153,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936925553.487, "dur": 62.315, + "args": { + "External id": 975154,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936925556.784, "dur": 2.444, + "args": { + "External id": 975155,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936925557.938, "dur": 1.172, + "args": { + "External id": 975156,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936925560.011, "dur": 51.626, + "args": { + "External id": 975157,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936925614.249, "dur": 0.926, + "args": { + "External id": 975158,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2338706, + "ts": 6345936925643.374, "dur": 30.731, + "args": { + "External id": 975159,"Sequence number": 10552329, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14203 + } + }, + { + "ph": "s", "id": 137, "pid": 2338706, "tid": 2338706, "ts": 6345936925643.374, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936925715.830, "dur": 212.206, + "args": { + "External id": 975160,"Sequence number": 10552330, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14204 + } + }, + { + "ph": "s", "id": 136, "pid": 2338706, "tid": 2338706, "ts": 6345936925715.830, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936925739.965, "dur": 3.853, + "args": { + "External id": 975161,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936925741.531, "dur": 2.122, + "args": { + "External id": 975162,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936925752.787, "dur": 8.560, + "args": { + "External id": 975163,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936925755.946, "dur": 4.965, + "args": { + "External id": 975164,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936925768.387, "dur": 4.618, + "args": { + "External id": 975165,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936925911.581, "dur": 3.542, + "args": { + "External id": 975166,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936925913.189, "dur": 1.740, + "args": { + "External id": 975167,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936925949.601, "dur": 165.963, + "args": { + "External id": 975168,"Sequence number": 10552331, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936925952.689, "dur": 8.063, + "args": { + "External id": 975169,"Sequence number": 10552331, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14213 + } + }, + { + "ph": "s", "id": 135, "pid": 2338706, "tid": 2338706, "ts": 6345936925952.689, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936925954.975, "dur": 4.217, + "args": { + "External id": 975170,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936925957.829, "dur": 1.150, + "args": { + "External id": 975171,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936925961.738, "dur": 153.332, + "args": { + "External id": 975172,"Sequence number": 10552332, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936925963.329, "dur": 7.953, + "args": { + "External id": 975173,"Sequence number": 10552332, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936925966.122, "dur": 4.983, + "args": { + "External id": 975174,"Sequence number": 10552332, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14218 + } + }, + { + "ph": "s", "id": 134, "pid": 2338706, "tid": 2338706, "ts": 6345936925966.122, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936925972.199, "dur": 133.475, + "args": { + "External id": 975175,"Sequence number": 10552333, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14219 + } + }, + { + "ph": "s", "id": 133, "pid": 2338706, "tid": 2338706, "ts": 6345936925972.199, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936926110.184, "dur": 3.875, + "args": { + "External id": 975176,"Sequence number": 10552334, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14220 + } + }, + { + "ph": "s", "id": 132, "pid": 2338706, "tid": 2338706, "ts": 6345936926110.184, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936926128.188, "dur": 87.167, + "args": { + "External id": 975177,"Sequence number": 10552335, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936926129.108, "dur": 10.299, + "args": { + "External id": 975178,"Sequence number": 10552335, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14222 + } + }, + { + "ph": "s", "id": 131, "pid": 2338706, "tid": 2338706, "ts": 6345936926129.108, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936926134.235, "dur": 3.798, + "args": { + "External id": 975179,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936926136.759, "dur": 1.082, + "args": { + "External id": 975180,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936926140.245, "dur": 74.789, + "args": { + "External id": 975181,"Sequence number": 10552336, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936926141.877, "dur": 5.822, + "args": { + "External id": 975182,"Sequence number": 10552336, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936926145.010, "dur": 2.562, + "args": { + "External id": 975183,"Sequence number": 10552336, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14227 + } + }, + { + "ph": "s", "id": 130, "pid": 2338706, "tid": 2338706, "ts": 6345936926145.010, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936926148.493, "dur": 56.733, + "args": { + "External id": 975184,"Sequence number": 10552337, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14228 + } + }, + { + "ph": "s", "id": 129, "pid": 2338706, "tid": 2338706, "ts": 6345936926148.493, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936926207.450, "dur": 6.728, + "args": { + "External id": 975185,"Sequence number": 10552338, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14229 + } + }, + { + "ph": "s", "id": 128, "pid": 2338706, "tid": 2338706, "ts": 6345936926207.450, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936926224.566, "dur": 69.464, + "args": { + "External id": 975186,"Sequence number": 10552339, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936926225.076, "dur": 9.560, + "args": { + "External id": 975187,"Sequence number": 10552339, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14231 + } + }, + { + "ph": "s", "id": 127, "pid": 2338706, "tid": 2338706, "ts": 6345936926225.076, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936926226.901, "dur": 6.470, + "args": { + "External id": 975188,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936926231.074, "dur": 2.071, + "args": { + "External id": 975189,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936926235.334, "dur": 58.400, + "args": { + "External id": 975190,"Sequence number": 10552340, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936926236.513, "dur": 6.857, + "args": { + "External id": 975191,"Sequence number": 10552340, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936926237.520, "dur": 5.697, + "args": { + "External id": 975192,"Sequence number": 10552340, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14236 + } + }, + { + "ph": "s", "id": 126, "pid": 2338706, "tid": 2338706, "ts": 6345936926237.520, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936926247.080, "dur": 40.759, + "args": { + "External id": 975193,"Sequence number": 10552341, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14237 + } + }, + { + "ph": "s", "id": 125, "pid": 2338706, "tid": 2338706, "ts": 6345936926247.080, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936926289.688, "dur": 3.635, + "args": { + "External id": 975194,"Sequence number": 10552342, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14238 + } + }, + { + "ph": "s", "id": 124, "pid": 2338706, "tid": 2338706, "ts": 6345936926289.688, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936926313.888, "dur": 4.625, + "args": { + "External id": 975195,"Sequence number": 10552343, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936926315.420, "dur": 2.899, + "args": { + "External id": 975196,"Sequence number": 10552343, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14240 + } + }, + { + "ph": "s", "id": 123, "pid": 2338706, "tid": 2338706, "ts": 6345936926315.420, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936926327.799, "dur": 5.765, + "args": { + "External id": 975197,"Sequence number": 10552344, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936926331.418, "dur": 2.011, + "args": { + "External id": 975198,"Sequence number": 10552344, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14242 + } + }, + { + "ph": "s", "id": 122, "pid": 2338706, "tid": 2338706, "ts": 6345936926331.418, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936926339.330, "dur": 3.193, + "args": { + "External id": 975199,"Sequence number": 10552345, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936926340.394, "dur": 2.002, + "args": { + "External id": 975200,"Sequence number": 10552345, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14244 + } + }, + { + "ph": "s", "id": 121, "pid": 2338706, "tid": 2338706, "ts": 6345936926340.394, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936926377.956, "dur": 178.735, + "args": { + "External id": 975201,"Sequence number": 10552346, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14245 + } + }, + { + "ph": "s", "id": 120, "pid": 2338706, "tid": 2338706, "ts": 6345936926377.956, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936926400.906, "dur": 11.107, + "args": { + "External id": 975202,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936926404.611, "dur": 6.935, + "args": { + "External id": 975203,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936926574.108, "dur": 120.737, + "args": { + "External id": 975204,"Sequence number": 10552347, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14248 + } + }, + { + "ph": "s", "id": 119, "pid": 2338706, "tid": 2338706, "ts": 6345936926574.108, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936926588.858, "dur": 8.046, + "args": { + "External id": 975205,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936926591.787, "dur": 4.633, + "args": { + "External id": 975206,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338706, "tid": 2338706, + "ts": 6345936926725.808, "dur": 196.944, + "args": { + "External id": 975207,"Sequence number": 10552348, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14251 + } + }, + { + "ph": "s", "id": 118, "pid": 2338706, "tid": 2338706, "ts": 6345936926725.808, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936926761.371, "dur": 135.290, + "args": { + "External id": 975208,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936926812.099, "dur": 8.112, + "args": { + "External id": 975209,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936926815.711, "dur": 3.889, + "args": { + "External id": 975210,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936926823.446, "dur": 4.395, + "args": { + "External id": 975211,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936926829.480, "dur": 1.715, + "args": { + "External id": 975212,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936926834.157, "dur": 4.139, + "args": { + "External id": 975213,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2338706, + "ts": 6345936926908.757, "dur": 4.770, + "args": { + "External id": 975214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936926928.243, "dur": 8.083, + "args": { + "External id": 975215,"Sequence number": 10552349, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936926930.130, "dur": 5.947, + "args": { + "External id": 975216,"Sequence number": 10552349, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14260 + } + }, + { + "ph": "s", "id": 117, "pid": 2338706, "tid": 2338706, "ts": 6345936926930.130, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936926949.012, "dur": 176.973, + "args": { + "External id": 975217,"Sequence number": 10552350, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936926950.058, "dur": 8.190, + "args": { + "External id": 975218,"Sequence number": 10552350, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14262 + } + }, + { + "ph": "s", "id": 116, "pid": 2338706, "tid": 2338706, "ts": 6345936926950.058, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936926952.586, "dur": 4.458, + "args": { + "External id": 975219,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936926955.282, "dur": 1.482, + "args": { + "External id": 975220,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936926959.310, "dur": 166.263, + "args": { + "External id": 975221,"Sequence number": 10552351, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936926963.685, "dur": 3.299, + "args": { + "External id": 975222,"Sequence number": 10552351, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936926964.596, "dur": 2.248, + "args": { + "External id": 975223,"Sequence number": 10552351, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14267 + } + }, + { + "ph": "s", "id": 115, "pid": 2338706, "tid": 2338706, "ts": 6345936926964.596, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936926968.320, "dur": 143.287, + "args": { + "External id": 975224,"Sequence number": 10552352, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14268 + } + }, + { + "ph": "s", "id": 114, "pid": 2338706, "tid": 2338706, "ts": 6345936926968.320, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936927116.654, "dur": 7.876, + "args": { + "External id": 975225,"Sequence number": 10552353, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14269 + } + }, + { + "ph": "s", "id": 113, "pid": 2338706, "tid": 2338706, "ts": 6345936927116.654, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936927186.778, "dur": 272.313, + "args": { + "External id": 975226,"Sequence number": 10552354, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14270 + } + }, + { + "ph": "s", "id": 112, "pid": 2338706, "tid": 2338706, "ts": 6345936927186.778, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936927211.888, "dur": 3.575, + "args": { + "External id": 975227,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936927213.155, "dur": 2.133, + "args": { + "External id": 975228,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936927220.330, "dur": 6.269, + "args": { + "External id": 975229,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936927224.433, "dur": 2.043, + "args": { + "External id": 975230,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936927225.300, "dur": 1.022, + "args": { + "External id": 975231,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936927235.853, "dur": 9.472, + "args": { + "External id": 975232,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936927238.569, "dur": 6.375, + "args": { + "External id": 975233,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936927252.642, "dur": 3.654, + "args": { + "External id": 975234,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936927260.554, "dur": 7.202, + "args": { + "External id": 975235,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936927433.741, "dur": 3.632, + "args": { + "External id": 975236,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936927434.865, "dur": 2.238, + "args": { + "External id": 975237,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936927440.840, "dur": 2.660, + "args": { + "External id": 975238,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936927442.244, "dur": 1.088, + "args": { + "External id": 975239,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936927479.755, "dur": 127.285, + "args": { + "External id": 975240,"Sequence number": 10552355, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936927481.406, "dur": 13.198, + "args": { + "External id": 975241,"Sequence number": 10552355, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14285 + } + }, + { + "ph": "s", "id": 111, "pid": 2338706, "tid": 2338706, "ts": 6345936927481.406, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936927484.620, "dur": 8.735, + "args": { + "External id": 975242,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936927489.996, "dur": 3.016, + "args": { + "External id": 975243,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936927495.792, "dur": 110.873, + "args": { + "External id": 975244,"Sequence number": 10552356, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936927497.444, "dur": 6.276, + "args": { + "External id": 975245,"Sequence number": 10552356, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936927498.728, "dur": 4.848, + "args": { + "External id": 975246,"Sequence number": 10552356, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14290 + } + }, + { + "ph": "s", "id": 110, "pid": 2338706, "tid": 2338706, "ts": 6345936927498.728, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936927507.010, "dur": 91.597, + "args": { + "External id": 975247,"Sequence number": 10552357, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14291 + } + }, + { + "ph": "s", "id": 109, "pid": 2338706, "tid": 2338706, "ts": 6345936927507.010, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936927602.041, "dur": 3.844, + "args": { + "External id": 975248,"Sequence number": 10552358, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14292 + } + }, + { + "ph": "s", "id": 108, "pid": 2338706, "tid": 2338706, "ts": 6345936927602.041, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936927617.197, "dur": 97.629, + "args": { + "External id": 975249,"Sequence number": 10552359, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936927618.053, "dur": 8.598, + "args": { + "External id": 975250,"Sequence number": 10552359, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14294 + } + }, + { + "ph": "s", "id": 107, "pid": 2338706, "tid": 2338706, "ts": 6345936927618.053, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936927619.577, "dur": 5.518, + "args": { + "External id": 975251,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936927624.020, "dur": 0.850, + "args": { + "External id": 975252,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936927635.557, "dur": 78.973, + "args": { + "External id": 975253,"Sequence number": 10552360, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936927636.779, "dur": 6.083, + "args": { + "External id": 975254,"Sequence number": 10552360, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936927637.730, "dur": 4.940, + "args": { + "External id": 975255,"Sequence number": 10552360, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14299 + } + }, + { + "ph": "s", "id": 106, "pid": 2338706, "tid": 2338706, "ts": 6345936927637.730, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936927643.715, "dur": 63.895, + "args": { + "External id": 975256,"Sequence number": 10552361, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14300 + } + }, + { + "ph": "s", "id": 105, "pid": 2338706, "tid": 2338706, "ts": 6345936927643.715, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936927710.056, "dur": 3.940, + "args": { + "External id": 975257,"Sequence number": 10552362, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14301 + } + }, + { + "ph": "s", "id": 104, "pid": 2338706, "tid": 2338706, "ts": 6345936927710.056, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936927736.835, "dur": 179.459, + "args": { + "External id": 975258,"Sequence number": 10552363, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14302 + } + }, + { + "ph": "s", "id": 103, "pid": 2338706, "tid": 2338706, "ts": 6345936927736.835, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936927779.546, "dur": 5.611, + "args": { + "External id": 975259,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936927832.066, "dur": 68.454, + "args": { + "External id": 975260,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936927832.921, "dur": 5.634, + "args": { + "External id": 975261,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936927834.182, "dur": 3.378, + "args": { + "External id": 975262,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936927836.456, "dur": 0.849, + "args": { + "External id": 975263,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936927839.542, "dur": 60.620, + "args": { + "External id": 975264,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936927841.177, "dur": 2.981, + "args": { + "External id": 975265,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936927842.658, "dur": 1.397, + "args": { + "External id": 975266,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936927844.760, "dur": 51.125, + "args": { + "External id": 975267,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936927898.315, "dur": 1.245, + "args": { + "External id": 975268,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2338706, + "ts": 6345936927926.218, "dur": 31.248, + "args": { + "External id": 975269,"Sequence number": 10552364, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14313 + } + }, + { + "ph": "s", "id": 102, "pid": 2338706, "tid": 2338706, "ts": 6345936927926.218, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936928001.538, "dur": 302.182, + "args": { + "External id": 975270,"Sequence number": 10552365, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14314 + } + }, + { + "ph": "s", "id": 101, "pid": 2338706, "tid": 2338706, "ts": 6345936928001.538, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936928047.206, "dur": 4.590, + "args": { + "External id": 975271,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936928048.561, "dur": 2.821, + "args": { + "External id": 975272,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936928103.000, "dur": 10.727, + "args": { + "External id": 975273,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936928106.419, "dur": 6.608, + "args": { + "External id": 975274,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936928122.562, "dur": 6.671, + "args": { + "External id": 975275,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936928283.693, "dur": 4.549, + "args": { + "External id": 975276,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936928285.059, "dur": 2.935, + "args": { + "External id": 975277,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936928326.507, "dur": 112.318, + "args": { + "External id": 975278,"Sequence number": 10552366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936928327.811, "dur": 9.881, + "args": { + "External id": 975279,"Sequence number": 10552366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14323 + } + }, + { + "ph": "s", "id": 100, "pid": 2338706, "tid": 2338706, "ts": 6345936928327.811, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936928330.960, "dur": 5.041, + "args": { + "External id": 975280,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936928333.672, "dur": 2.087, + "args": { + "External id": 975281,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936928341.372, "dur": 97.015, + "args": { + "External id": 975282,"Sequence number": 10552367, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936928343.373, "dur": 4.889, + "args": { + "External id": 975283,"Sequence number": 10552367, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936928344.364, "dur": 3.727, + "args": { + "External id": 975284,"Sequence number": 10552367, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14328 + } + }, + { + "ph": "s", "id": 99, "pid": 2338706, "tid": 2338706, "ts": 6345936928344.364, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936928349.188, "dur": 78.752, + "args": { + "External id": 975285,"Sequence number": 10552368, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14329 + } + }, + { + "ph": "s", "id": 98, "pid": 2338706, "tid": 2338706, "ts": 6345936928349.188, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936928430.890, "dur": 6.703, + "args": { + "External id": 975286,"Sequence number": 10552369, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14330 + } + }, + { + "ph": "s", "id": 97, "pid": 2338706, "tid": 2338706, "ts": 6345936928430.890, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936928448.684, "dur": 69.467, + "args": { + "External id": 975287,"Sequence number": 10552370, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936928449.423, "dur": 6.202, + "args": { + "External id": 975288,"Sequence number": 10552370, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14332 + } + }, + { + "ph": "s", "id": 96, "pid": 2338706, "tid": 2338706, "ts": 6345936928449.423, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936928451.172, "dur": 3.121, + "args": { + "External id": 975289,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936928453.444, "dur": 0.649, + "args": { + "External id": 975290,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936928456.311, "dur": 61.583, + "args": { + "External id": 975291,"Sequence number": 10552371, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936928460.021, "dur": 3.811, + "args": { + "External id": 975292,"Sequence number": 10552371, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936928461.175, "dur": 2.484, + "args": { + "External id": 975293,"Sequence number": 10552371, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14337 + } + }, + { + "ph": "s", "id": 95, "pid": 2338706, "tid": 2338706, "ts": 6345936928461.175, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936928464.369, "dur": 47.991, + "args": { + "External id": 975294,"Sequence number": 10552372, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14338 + } + }, + { + "ph": "s", "id": 94, "pid": 2338706, "tid": 2338706, "ts": 6345936928464.369, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936928515.152, "dur": 2.340, + "args": { + "External id": 975295,"Sequence number": 10552373, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14339 + } + }, + { + "ph": "s", "id": 93, "pid": 2338706, "tid": 2338706, "ts": 6345936928515.152, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936928525.841, "dur": 69.549, + "args": { + "External id": 975296,"Sequence number": 10552374, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936928528.971, "dur": 5.705, + "args": { + "External id": 975297,"Sequence number": 10552374, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14341 + } + }, + { + "ph": "s", "id": 92, "pid": 2338706, "tid": 2338706, "ts": 6345936928528.971, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936928530.452, "dur": 2.813, + "args": { + "External id": 975298,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936928532.607, "dur": 0.504, + "args": { + "External id": 975299,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936928535.418, "dur": 59.737, + "args": { + "External id": 975300,"Sequence number": 10552375, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936928536.600, "dur": 8.100, + "args": { + "External id": 975301,"Sequence number": 10552375, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936928539.706, "dur": 4.838, + "args": { + "External id": 975302,"Sequence number": 10552375, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14346 + } + }, + { + "ph": "s", "id": 91, "pid": 2338706, "tid": 2338706, "ts": 6345936928539.706, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936928545.252, "dur": 41.415, + "args": { + "External id": 975303,"Sequence number": 10552376, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14347 + } + }, + { + "ph": "s", "id": 90, "pid": 2338706, "tid": 2338706, "ts": 6345936928545.252, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936928588.811, "dur": 5.897, + "args": { + "External id": 975304,"Sequence number": 10552377, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14348 + } + }, + { + "ph": "s", "id": 89, "pid": 2338706, "tid": 2338706, "ts": 6345936928588.811, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936928613.495, "dur": 6.560, + "args": { + "External id": 975305,"Sequence number": 10552378, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936928616.943, "dur": 2.927, + "args": { + "External id": 975306,"Sequence number": 10552378, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14350 + } + }, + { + "ph": "s", "id": 88, "pid": 2338706, "tid": 2338706, "ts": 6345936928616.943, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936928628.926, "dur": 3.477, + "args": { + "External id": 975307,"Sequence number": 10552379, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936928630.375, "dur": 1.892, + "args": { + "External id": 975308,"Sequence number": 10552379, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14352 + } + }, + { + "ph": "s", "id": 87, "pid": 2338706, "tid": 2338706, "ts": 6345936928630.375, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936928637.706, "dur": 5.580, + "args": { + "External id": 975309,"Sequence number": 10552380, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936928639.246, "dur": 3.902, + "args": { + "External id": 975310,"Sequence number": 10552380, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14354 + } + }, + { + "ph": "s", "id": 86, "pid": 2338706, "tid": 2338706, "ts": 6345936928639.246, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936928677.074, "dur": 173.895, + "args": { + "External id": 975311,"Sequence number": 10552381, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14355 + } + }, + { + "ph": "s", "id": 85, "pid": 2338706, "tid": 2338706, "ts": 6345936928677.074, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936928699.531, "dur": 9.650, + "args": { + "External id": 975312,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936928702.877, "dur": 5.810, + "args": { + "External id": 975313,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936928864.915, "dur": 124.055, + "args": { + "External id": 975314,"Sequence number": 10552382, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14358 + } + }, + { + "ph": "s", "id": 84, "pid": 2338706, "tid": 2338706, "ts": 6345936928864.915, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936928880.568, "dur": 8.330, + "args": { + "External id": 975315,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936928884.187, "dur": 4.215, + "args": { + "External id": 975316,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338706, "tid": 2338706, + "ts": 6345936929045.608, "dur": 267.807, + "args": { + "External id": 975317,"Sequence number": 10552383, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14361 + } + }, + { + "ph": "s", "id": 83, "pid": 2338706, "tid": 2338706, "ts": 6345936929045.608, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936929119.711, "dur": 162.146, + "args": { + "External id": 975318,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936929176.800, "dur": 11.924, + "args": { + "External id": 975319,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936929181.297, "dur": 6.586, + "args": { + "External id": 975320,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936929192.146, "dur": 7.393, + "args": { + "External id": 975321,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936929201.141, "dur": 1.535, + "args": { + "External id": 975322,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936929208.449, "dur": 4.714, + "args": { + "External id": 975323,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2338706, + "ts": 6345936929295.241, "dur": 5.813, + "args": { + "External id": 975324,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936929319.646, "dur": 7.425, + "args": { + "External id": 975325,"Sequence number": 10552384, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936929321.818, "dur": 5.095, + "args": { + "External id": 975326,"Sequence number": 10552384, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14370 + } + }, + { + "ph": "s", "id": 82, "pid": 2338706, "tid": 2338706, "ts": 6345936929321.818, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936929340.846, "dur": 137.770, + "args": { + "External id": 975327,"Sequence number": 10552385, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936929342.237, "dur": 14.710, + "args": { + "External id": 975328,"Sequence number": 10552385, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14372 + } + }, + { + "ph": "s", "id": 81, "pid": 2338706, "tid": 2338706, "ts": 6345936929342.237, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936929348.071, "dur": 7.580, + "args": { + "External id": 975329,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936929353.301, "dur": 2.007, + "args": { + "External id": 975330,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936929358.269, "dur": 119.943, + "args": { + "External id": 975331,"Sequence number": 10552386, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936929360.787, "dur": 6.019, + "args": { + "External id": 975332,"Sequence number": 10552386, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936929361.894, "dur": 4.701, + "args": { + "External id": 975333,"Sequence number": 10552386, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14377 + } + }, + { + "ph": "s", "id": 80, "pid": 2338706, "tid": 2338706, "ts": 6345936929361.894, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936929370.533, "dur": 96.794, + "args": { + "External id": 975334,"Sequence number": 10552387, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14378 + } + }, + { + "ph": "s", "id": 79, "pid": 2338706, "tid": 2338706, "ts": 6345936929370.533, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936929470.554, "dur": 6.629, + "args": { + "External id": 975335,"Sequence number": 10552388, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14379 + } + }, + { + "ph": "s", "id": 78, "pid": 2338706, "tid": 2338706, "ts": 6345936929470.554, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936929517.358, "dur": 335.626, + "args": { + "External id": 975336,"Sequence number": 10552389, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14380 + } + }, + { + "ph": "s", "id": 77, "pid": 2338706, "tid": 2338706, "ts": 6345936929517.358, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936929537.829, "dur": 4.997, + "args": { + "External id": 975337,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936929541.116, "dur": 1.453, + "args": { + "External id": 975338,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936929547.366, "dur": 4.016, + "args": { + "External id": 975339,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936929548.762, "dur": 2.486, + "args": { + "External id": 975340,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936929550.206, "dur": 0.925, + "args": { + "External id": 975341,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936929559.780, "dur": 10.458, + "args": { + "External id": 975342,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936929562.471, "dur": 7.432, + "args": { + "External id": 975343,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936929579.821, "dur": 2.990, + "args": { + "External id": 975344,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936929586.623, "dur": 3.604, + "args": { + "External id": 975345,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936929821.583, "dur": 5.889, + "args": { + "External id": 975346,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936929823.257, "dur": 3.802, + "args": { + "External id": 975347,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936929831.250, "dur": 2.721, + "args": { + "External id": 975348,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936929832.654, "dur": 1.217, + "args": { + "External id": 975349,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936929874.276, "dur": 162.617, + "args": { + "External id": 975350,"Sequence number": 10552390, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936929875.364, "dur": 23.712, + "args": { + "External id": 975351,"Sequence number": 10552390, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14395 + } + }, + { + "ph": "s", "id": 76, "pid": 2338706, "tid": 2338706, "ts": 6345936929875.364, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936929880.820, "dur": 16.745, + "args": { + "External id": 975352,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936929895.110, "dur": 2.004, + "args": { + "External id": 975353,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936929900.530, "dur": 135.967, + "args": { + "External id": 975354,"Sequence number": 10552391, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936929902.665, "dur": 7.722, + "args": { + "External id": 975355,"Sequence number": 10552391, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936929906.045, "dur": 4.170, + "args": { + "External id": 975356,"Sequence number": 10552391, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14400 + } + }, + { + "ph": "s", "id": 75, "pid": 2338706, "tid": 2338706, "ts": 6345936929906.045, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936929911.453, "dur": 113.351, + "args": { + "External id": 975357,"Sequence number": 10552392, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14401 + } + }, + { + "ph": "s", "id": 74, "pid": 2338706, "tid": 2338706, "ts": 6345936929911.453, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936930029.595, "dur": 5.936, + "args": { + "External id": 975358,"Sequence number": 10552393, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14402 + } + }, + { + "ph": "s", "id": 73, "pid": 2338706, "tid": 2338706, "ts": 6345936930029.595, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936930049.563, "dur": 212.782, + "args": { + "External id": 975359,"Sequence number": 10552394, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936930050.516, "dur": 123.500, + "args": { + "External id": 975360,"Sequence number": 10552394, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14404 + } + }, + { + "ph": "s", "id": 72, "pid": 2338706, "tid": 2338706, "ts": 6345936930050.516, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936930162.993, "dur": 8.478, + "args": { + "External id": 975361,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936930167.329, "dur": 3.581, + "args": { + "External id": 975362,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936930175.267, "dur": 86.816, + "args": { + "External id": 975363,"Sequence number": 10552395, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936930176.521, "dur": 8.465, + "args": { + "External id": 975364,"Sequence number": 10552395, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936930177.388, "dur": 7.413, + "args": { + "External id": 975365,"Sequence number": 10552395, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14409 + } + }, + { + "ph": "s", "id": 71, "pid": 2338706, "tid": 2338706, "ts": 6345936930177.388, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936930185.681, "dur": 68.929, + "args": { + "External id": 975366,"Sequence number": 10552396, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14410 + } + }, + { + "ph": "s", "id": 70, "pid": 2338706, "tid": 2338706, "ts": 6345936930185.681, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936930256.869, "dur": 3.862, + "args": { + "External id": 975367,"Sequence number": 10552397, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14411 + } + }, + { + "ph": "s", "id": 69, "pid": 2338706, "tid": 2338706, "ts": 6345936930256.869, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936930295.438, "dur": 188.078, + "args": { + "External id": 975368,"Sequence number": 10552398, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14412 + } + }, + { + "ph": "s", "id": 68, "pid": 2338706, "tid": 2338706, "ts": 6345936930295.438, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936930348.348, "dur": 5.598, + "args": { + "External id": 975369,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936930394.056, "dur": 72.571, + "args": { + "External id": 975370,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936930394.907, "dur": 5.654, + "args": { + "External id": 975371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936930396.572, "dur": 2.900, + "args": { + "External id": 975372,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936930398.207, "dur": 0.948, + "args": { + "External id": 975373,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936930401.423, "dur": 64.852, + "args": { + "External id": 975374,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936930402.827, "dur": 4.396, + "args": { + "External id": 975375,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936930405.794, "dur": 1.265, + "args": { + "External id": 975376,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936930407.908, "dur": 52.078, + "args": { + "External id": 975377,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936930461.952, "dur": 3.572, + "args": { + "External id": 975378,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2338706, + "ts": 6345936930495.119, "dur": 29.042, + "args": { + "External id": 975379,"Sequence number": 10552399, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14423 + } + }, + { + "ph": "s", "id": 67, "pid": 2338706, "tid": 2338706, "ts": 6345936930495.119, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936930565.339, "dur": 238.179, + "args": { + "External id": 975380,"Sequence number": 10552400, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14424 + } + }, + { + "ph": "s", "id": 66, "pid": 2338706, "tid": 2338706, "ts": 6345936930565.339, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936930591.696, "dur": 3.382, + "args": { + "External id": 975381,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936930592.867, "dur": 1.964, + "args": { + "External id": 975382,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936930605.206, "dur": 11.533, + "args": { + "External id": 975383,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936930611.314, "dur": 4.904, + "args": { + "External id": 975384,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936930625.194, "dur": 4.998, + "args": { + "External id": 975385,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936930786.480, "dur": 3.326, + "args": { + "External id": 975386,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936930787.575, "dur": 1.951, + "args": { + "External id": 975387,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936930823.115, "dur": 98.509, + "args": { + "External id": 975388,"Sequence number": 10552401, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936930823.847, "dur": 10.029, + "args": { + "External id": 975389,"Sequence number": 10552401, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14433 + } + }, + { + "ph": "s", "id": 65, "pid": 2338706, "tid": 2338706, "ts": 6345936930823.847, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936930825.972, "dur": 6.331, + "args": { + "External id": 975390,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936930830.821, "dur": 1.272, + "args": { + "External id": 975391,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936930835.006, "dur": 86.338, + "args": { + "External id": 975392,"Sequence number": 10552402, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936930836.601, "dur": 4.113, + "args": { + "External id": 975393,"Sequence number": 10552402, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936930837.727, "dur": 2.827, + "args": { + "External id": 975394,"Sequence number": 10552402, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14438 + } + }, + { + "ph": "s", "id": 64, "pid": 2338706, "tid": 2338706, "ts": 6345936930837.727, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936930843.844, "dur": 70.189, + "args": { + "External id": 975395,"Sequence number": 10552403, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14439 + } + }, + { + "ph": "s", "id": 63, "pid": 2338706, "tid": 2338706, "ts": 6345936930843.844, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936930916.545, "dur": 4.141, + "args": { + "External id": 975396,"Sequence number": 10552404, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14440 + } + }, + { + "ph": "s", "id": 62, "pid": 2338706, "tid": 2338706, "ts": 6345936930916.545, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936930935.636, "dur": 67.905, + "args": { + "External id": 975397,"Sequence number": 10552405, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936930936.112, "dur": 6.951, + "args": { + "External id": 975398,"Sequence number": 10552405, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14442 + } + }, + { + "ph": "s", "id": 61, "pid": 2338706, "tid": 2338706, "ts": 6345936930936.112, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936930937.267, "dur": 4.625, + "args": { + "External id": 975399,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936930941.053, "dur": 0.652, + "args": { + "External id": 975400,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936930943.735, "dur": 59.586, + "args": { + "External id": 975401,"Sequence number": 10552406, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936930944.706, "dur": 5.008, + "args": { + "External id": 975402,"Sequence number": 10552406, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936930945.532, "dur": 3.992, + "args": { + "External id": 975403,"Sequence number": 10552406, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14447 + } + }, + { + "ph": "s", "id": 60, "pid": 2338706, "tid": 2338706, "ts": 6345936930945.532, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936930950.339, "dur": 46.628, + "args": { + "External id": 975404,"Sequence number": 10552407, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14448 + } + }, + { + "ph": "s", "id": 59, "pid": 2338706, "tid": 2338706, "ts": 6345936930950.339, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936930998.928, "dur": 3.707, + "args": { + "External id": 975405,"Sequence number": 10552408, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14449 + } + }, + { + "ph": "s", "id": 58, "pid": 2338706, "tid": 2338706, "ts": 6345936930998.928, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936931032.795, "dur": 120.632, + "args": { + "External id": 975406,"Sequence number": 10552409, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936931033.473, "dur": 8.434, + "args": { + "External id": 975407,"Sequence number": 10552409, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14451 + } + }, + { + "ph": "s", "id": 57, "pid": 2338706, "tid": 2338706, "ts": 6345936931033.473, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936931035.333, "dur": 4.984, + "args": { + "External id": 975408,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936931038.967, "dur": 0.988, + "args": { + "External id": 975409,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936931042.953, "dur": 110.114, + "args": { + "External id": 975410,"Sequence number": 10552410, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936931046.696, "dur": 4.532, + "args": { + "External id": 975411,"Sequence number": 10552410, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936931047.545, "dur": 3.522, + "args": { + "External id": 975412,"Sequence number": 10552410, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14456 + } + }, + { + "ph": "s", "id": 56, "pid": 2338706, "tid": 2338706, "ts": 6345936931047.545, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936931052.005, "dur": 92.688, + "args": { + "External id": 975413,"Sequence number": 10552411, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14457 + } + }, + { + "ph": "s", "id": 55, "pid": 2338706, "tid": 2338706, "ts": 6345936931052.005, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936931148.147, "dur": 4.269, + "args": { + "External id": 975414,"Sequence number": 10552412, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14458 + } + }, + { + "ph": "s", "id": 54, "pid": 2338706, "tid": 2338706, "ts": 6345936931148.147, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936931177.601, "dur": 4.800, + "args": { + "External id": 975415,"Sequence number": 10552413, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936931178.492, "dur": 3.701, + "args": { + "External id": 975416,"Sequence number": 10552413, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14460 + } + }, + { + "ph": "s", "id": 53, "pid": 2338706, "tid": 2338706, "ts": 6345936931178.492, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936931190.952, "dur": 4.700, + "args": { + "External id": 975417,"Sequence number": 10552414, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936931192.054, "dur": 3.463, + "args": { + "External id": 975418,"Sequence number": 10552414, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14462 + } + }, + { + "ph": "s", "id": 52, "pid": 2338706, "tid": 2338706, "ts": 6345936931192.054, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936931200.541, "dur": 5.575, + "args": { + "External id": 975419,"Sequence number": 10552415, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936931204.070, "dur": 1.906, + "args": { + "External id": 975420,"Sequence number": 10552415, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14464 + } + }, + { + "ph": "s", "id": 51, "pid": 2338706, "tid": 2338706, "ts": 6345936931204.070, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936931242.928, "dur": 190.054, + "args": { + "External id": 975421,"Sequence number": 10552416, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14465 + } + }, + { + "ph": "s", "id": 50, "pid": 2338706, "tid": 2338706, "ts": 6345936931242.928, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936931266.174, "dur": 10.142, + "args": { + "External id": 975422,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936931269.561, "dur": 6.202, + "args": { + "External id": 975423,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936931448.126, "dur": 123.324, + "args": { + "External id": 975424,"Sequence number": 10552417, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14468 + } + }, + { + "ph": "s", "id": 49, "pid": 2338706, "tid": 2338706, "ts": 6345936931448.126, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936931463.595, "dur": 7.481, + "args": { + "External id": 975425,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936931466.375, "dur": 4.193, + "args": { + "External id": 975426,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338706, "tid": 2338706, + "ts": 6345936931604.604, "dur": 202.285, + "args": { + "External id": 975427,"Sequence number": 10552418, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14471 + } + }, + { + "ph": "s", "id": 48, "pid": 2338706, "tid": 2338706, "ts": 6345936931604.604, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345936931634.597, "dur": 144.324, + "args": { + "External id": 975428,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936931693.276, "dur": 7.669, + "args": { + "External id": 975429,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936931696.643, "dur": 3.801, + "args": { + "External id": 975430,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936931704.057, "dur": 4.026, + "args": { + "External id": 975431,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936931709.508, "dur": 1.371, + "args": { + "External id": 975432,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936931713.982, "dur": 5.008, + "args": { + "External id": 975433,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2338706, + "ts": 6345936931792.067, "dur": 5.149, + "args": { + "External id": 975434,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936931812.432, "dur": 5.517, + "args": { + "External id": 975435,"Sequence number": 10552419, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936931814.014, "dur": 3.767, + "args": { + "External id": 975436,"Sequence number": 10552419, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14480 + } + }, + { + "ph": "s", "id": 47, "pid": 2338706, "tid": 2338706, "ts": 6345936931814.014, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936931831.806, "dur": 108.628, + "args": { + "External id": 975437,"Sequence number": 10552420, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936931833.133, "dur": 10.861, + "args": { + "External id": 975438,"Sequence number": 10552420, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14482 + } + }, + { + "ph": "s", "id": 46, "pid": 2338706, "tid": 2338706, "ts": 6345936931833.133, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936931838.444, "dur": 4.486, + "args": { + "External id": 975439,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936931840.998, "dur": 1.649, + "args": { + "External id": 975440,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936931845.423, "dur": 94.488, + "args": { + "External id": 975441,"Sequence number": 10552421, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936931847.850, "dur": 4.811, + "args": { + "External id": 975442,"Sequence number": 10552421, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936931848.377, "dur": 4.139, + "args": { + "External id": 975443,"Sequence number": 10552421, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14487 + } + }, + { + "ph": "s", "id": 45, "pid": 2338706, "tid": 2338706, "ts": 6345936931848.377, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936931853.722, "dur": 79.230, + "args": { + "External id": 975444,"Sequence number": 10552422, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14488 + } + }, + { + "ph": "s", "id": 44, "pid": 2338706, "tid": 2338706, "ts": 6345936931853.722, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936931935.583, "dur": 3.678, + "args": { + "External id": 975445,"Sequence number": 10552423, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14489 + } + }, + { + "ph": "s", "id": 43, "pid": 2338706, "tid": 2338706, "ts": 6345936931935.583, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936931975.872, "dur": 324.635, + "args": { + "External id": 975446,"Sequence number": 10552424, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14490 + } + }, + { + "ph": "s", "id": 42, "pid": 2338706, "tid": 2338706, "ts": 6345936931975.872, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936931997.277, "dur": 7.339, + "args": { + "External id": 975447,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936932000.509, "dur": 3.975, + "args": { + "External id": 975448,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338706, "tid": 2338706, + "ts": 6345936932030.967, "dur": 4.103, + "args": { + "External id": 975449,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936932032.345, "dur": 2.592, + "args": { + "External id": 975450,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936932033.100, "dur": 1.522, + "args": { + "External id": 975451,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936932045.052, "dur": 8.974, + "args": { + "External id": 975452,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936932047.344, "dur": 6.340, + "args": { + "External id": 975453,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936932103.537, "dur": 4.883, + "args": { + "External id": 975454,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936932113.593, "dur": 4.385, + "args": { + "External id": 975455,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936932269.012, "dur": 6.648, + "args": { + "External id": 975456,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936932270.221, "dur": 5.150, + "args": { + "External id": 975457,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936932278.894, "dur": 2.339, + "args": { + "External id": 975458,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936932280.018, "dur": 1.100, + "args": { + "External id": 975459,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936932321.311, "dur": 108.452, + "args": { + "External id": 975460,"Sequence number": 10552425, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936932322.466, "dur": 8.352, + "args": { + "External id": 975461,"Sequence number": 10552425, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14505 + } + }, + { + "ph": "s", "id": 41, "pid": 2338706, "tid": 2338706, "ts": 6345936932322.466, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936932325.054, "dur": 4.135, + "args": { + "External id": 975462,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936932327.255, "dur": 1.649, + "args": { + "External id": 975463,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936932331.992, "dur": 97.487, + "args": { + "External id": 975464,"Sequence number": 10552426, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936932333.706, "dur": 6.811, + "args": { + "External id": 975465,"Sequence number": 10552426, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936932336.784, "dur": 3.531, + "args": { + "External id": 975466,"Sequence number": 10552426, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14510 + } + }, + { + "ph": "s", "id": 40, "pid": 2338706, "tid": 2338706, "ts": 6345936932336.784, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936932341.394, "dur": 81.910, + "args": { + "External id": 975467,"Sequence number": 10552427, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14511 + } + }, + { + "ph": "s", "id": 39, "pid": 2338706, "tid": 2338706, "ts": 6345936932341.394, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936932425.912, "dur": 2.872, + "args": { + "External id": 975468,"Sequence number": 10552428, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14512 + } + }, + { + "ph": "s", "id": 38, "pid": 2338706, "tid": 2338706, "ts": 6345936932425.912, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936932438.682, "dur": 78.051, + "args": { + "External id": 975469,"Sequence number": 10552429, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936932439.249, "dur": 11.723, + "args": { + "External id": 975470,"Sequence number": 10552429, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14514 + } + }, + { + "ph": "s", "id": 37, "pid": 2338706, "tid": 2338706, "ts": 6345936932439.249, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936932445.080, "dur": 4.427, + "args": { + "External id": 975471,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936932448.472, "dur": 0.842, + "args": { + "External id": 975472,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936932451.830, "dur": 64.605, + "args": { + "External id": 975473,"Sequence number": 10552430, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936932453.091, "dur": 6.793, + "args": { + "External id": 975474,"Sequence number": 10552430, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936932456.050, "dur": 3.676, + "args": { + "External id": 975475,"Sequence number": 10552430, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14519 + } + }, + { + "ph": "s", "id": 36, "pid": 2338706, "tid": 2338706, "ts": 6345936932456.050, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936932460.624, "dur": 50.328, + "args": { + "External id": 975476,"Sequence number": 10552431, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14520 + } + }, + { + "ph": "s", "id": 35, "pid": 2338706, "tid": 2338706, "ts": 6345936932460.624, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936932512.886, "dur": 3.070, + "args": { + "External id": 975477,"Sequence number": 10552432, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14521 + } + }, + { + "ph": "s", "id": 34, "pid": 2338706, "tid": 2338706, "ts": 6345936932512.886, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936932539.607, "dur": 164.301, + "args": { + "External id": 975478,"Sequence number": 10552433, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14522 + } + }, + { + "ph": "s", "id": 33, "pid": 2338706, "tid": 2338706, "ts": 6345936932539.607, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936932584.813, "dur": 4.479, + "args": { + "External id": 975479,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345936932622.853, "dur": 67.507, + "args": { + "External id": 975480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345936932623.690, "dur": 5.435, + "args": { + "External id": 975481,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936932625.003, "dur": 3.005, + "args": { + "External id": 975482,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936932626.599, "dur": 1.223, + "args": { + "External id": 975483,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345936932629.889, "dur": 60.038, + "args": { + "External id": 975484,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345936932631.295, "dur": 6.190, + "args": { + "External id": 975485,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936932634.079, "dur": 3.287, + "args": { + "External id": 975486,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345936932638.310, "dur": 47.910, + "args": { + "External id": 975487,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345936932688.354, "dur": 0.965, + "args": { + "External id": 975488,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2338706, + "ts": 6345936932714.094, "dur": 30.238, + "args": { + "External id": 975489,"Sequence number": 10552434, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14533 + } + }, + { + "ph": "s", "id": 32, "pid": 2338706, "tid": 2338706, "ts": 6345936932714.094, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338706, "tid": 2338706, + "ts": 6345936932765.838, "dur": 45.981, + "args": { + "External id": 975490,"Sequence number": 10552435, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "-2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 14534 + } + }, + { + "ph": "s", "id": 31, "pid": 2338706, "tid": 2338706, "ts": 6345936932765.838, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2338706, "tid": 2338706, + "ts": 6345936932776.480, "dur": 30.612, + "args": { + "External id": 975491,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 14535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936932808.747, "dur": 1.270, + "args": { + "External id": 975492,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 16384], []], "Ev Idx": 14536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345936932850.642, "dur": 51.071, + "args": { + "External id": 975493,"Record function id": 0, "Ev Idx": 14537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 2/0", "pid": 2338706, "tid": 2338706, + "ts": 6345936932902.927, "dur": 301.353, + "args": { + "External id": 975494,"Record function id": 0, "Ev Idx": 14538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936932944.846, "dur": 248.875, + "args": { + "External id": 975495,"Sequence number": 10552436, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1], [67108864, 16384, 4096, 1]], "Input Dims": [[4096], [8, 4096, 4, 4096]], "Ev Idx": 14539 + } + }, + { + "ph": "s", "id": 30, "pid": 2338706, "tid": 2338706, "ts": 6345936932944.846, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345936933048.938, "dur": 92.691, + "args": { + "External id": 975496,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [131072, 4096], [4096], [131072], [], [], [], [], [], [], [], [], []], "Ev Idx": 14540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2338706, + "ts": 6345936933311.874, "dur": 44.315, + "args": { + "External id": 975497,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936933315.134, "dur": 7.634, + "args": { + "External id": 975498,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345936933325.695, "dur": 30.012, + "args": { + "External id": 975499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 14543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345936933329.068, "dur": 26.000, + "args": { + "External id": 975500,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 14544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2338706, + "ts": 6345936933362.072, "dur": 23.694, + "args": { + "External id": 975501,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936933363.427, "dur": 3.034, + "args": { + "External id": 975502,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345936933369.746, "dur": 15.713, + "args": { + "External id": 975503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 14547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345936933370.508, "dur": 14.480, + "args": { + "External id": 975504,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 14548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2338706, + "ts": 6345936933389.157, "dur": 18.230, + "args": { + "External id": 975505,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936933390.078, "dur": 2.344, + "args": { + "External id": 975506,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345936933393.370, "dur": 13.699, + "args": { + "External id": 975507,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 14551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345936933394.197, "dur": 12.351, + "args": { + "External id": 975508,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 14552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936933420.440, "dur": 0.788, + "args": { + "External id": 975509,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 14553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338706, "tid": 2338706, + "ts": 6345936933430.022, "dur": 12.355, + "args": { + "External id": 975510,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "5", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 14554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936933438.174, "dur": 2.360, + "args": { + "External id": 975511,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 14555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936933450.141, "dur": 7.544, + "args": { + "External id": 975512,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 14556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936933454.514, "dur": 0.814, + "args": { + "External id": 975513,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 14557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936933459.416, "dur": 5.314, + "args": { + "External id": 975514,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 14558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936933461.107, "dur": 2.727, + "args": { + "External id": 975515,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 14559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936933466.100, "dur": 3.830, + "args": { + "External id": 975516,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 14560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936933468.574, "dur": 0.706, + "args": { + "External id": 975517,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 14561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936933474.287, "dur": 3.541, + "args": { + "External id": 975518,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 14562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936933476.225, "dur": 0.879, + "args": { + "External id": 975519,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 14563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936933479.142, "dur": 3.325, + "args": { + "External id": 975520,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 14564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936933481.102, "dur": 0.477, + "args": { + "External id": 975521,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 14565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936933483.668, "dur": 3.026, + "args": { + "External id": 975522,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4096, 4], [], [], [], []], "Ev Idx": 14566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936933485.514, "dur": 0.493, + "args": { + "External id": 975523,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 14567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345936933491.227, "dur": 6.637, + "args": { + "External id": 975524,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4096, 4], [], []], "Ev Idx": 14568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936933496.114, "dur": 0.769, + "args": { + "External id": 975525,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 14569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936933503.087, "dur": 3.476, + "args": { + "External id": 975526,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 14570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936933505.508, "dur": 0.347, + "args": { + "External id": 975527,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 14571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2338706, + "ts": 6345936933512.701, "dur": 8.369, + "args": { + "External id": 975528,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 14572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936933519.048, "dur": 0.575, + "args": { + "External id": 975529,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 14573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936933522.385, "dur": 5.427, + "args": { + "External id": 975530,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 14574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936933524.214, "dur": 2.867, + "args": { + "External id": 975531,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 14575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936933530.841, "dur": 9.329, + "args": { + "External id": 975532,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 14576 + } + }, + { + "ph": "s", "id": 29, "pid": 2338706, "tid": 2338706, "ts": 6345936933530.841, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936933537.812, "dur": 0.574, + "args": { + "External id": 975533,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 14577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936933541.564, "dur": 4.884, + "args": { + "External id": 975534,"Sequence number": 10552438, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 14578 + } + }, + { + "ph": "s", "id": 28, "pid": 2338706, "tid": 2338706, "ts": 6345936933541.564, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936933545.118, "dur": 0.300, + "args": { + "External id": 975535,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 14579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2338706, + "ts": 6345936933547.634, "dur": 7.829, + "args": { + "External id": 975536,"Sequence number": 10552439, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 14580 + } + }, + { + "ph": "s", "id": 27, "pid": 2338706, "tid": 2338706, "ts": 6345936933547.634, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936933553.524, "dur": 0.596, + "args": { + "External id": 975537,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 14581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345936933556.795, "dur": 4.695, + "args": { + "External id": 975538,"Sequence number": 10552440, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 14582 + } + }, + { + "ph": "s", "id": 26, "pid": 2338706, "tid": 2338706, "ts": 6345936933556.795, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936933559.767, "dur": 0.768, + "args": { + "External id": 975539,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 14583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2338706, + "ts": 6345936933566.352, "dur": 42.555, + "args": { + "External id": 975540,"Sequence number": 10552441, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 14584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2338706, + "ts": 6345936933568.218, "dur": 40.327, + "args": { + "External id": 975541,"Sequence number": 10552441, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 14585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936933571.149, "dur": 8.357, + "args": { + "External id": 975542,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 14586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936933573.277, "dur": 5.359, + "args": { + "External id": 975543,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936933581.329, "dur": 26.653, + "args": { + "External id": 975544,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 14588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936933635.913, "dur": 4.275, + "args": { + "External id": 975545,"Sequence number": 10552441, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14589 + } + }, + { + "ph": "s", "id": 25, "pid": 2338706, "tid": 2338706, "ts": 6345936933635.913, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345936933645.499, "dur": 1.334, + "args": { + "External id": 975546,"Sequence number": 10552442, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 14590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345936933679.200, "dur": 130144.570, + "args": { + "External id": 975547,"Sequence number": 10552442, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 14591 + } + }, + { + "ph": "s", "id": 24, "pid": 2338706, "tid": 2338706, "ts": 6345936933679.200, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2338706, + "ts": 6345936933697.495, "dur": 33.598, + "args": { + "External id": 975548,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2338706, + "ts": 6345936933698.391, "dur": 32.449, + "args": { + "External id": 975549,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936933700.319, "dur": 7.643, + "args": { + "External id": 975550,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936933701.594, "dur": 5.881, + "args": { + "External id": 975551,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936933708.854, "dur": 21.433, + "args": { + "External id": 975552,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 14596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936933752.122, "dur": 29.284, + "args": { + "External id": 975553,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936933753.858, "dur": 6.352, + "args": { + "External id": 975554,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936933756.136, "dur": 3.665, + "args": { + "External id": 975555,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345936933761.175, "dur": 19.983, + "args": { + "External id": 975556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 14600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345936933764.018, "dur": 16.648, + "args": { + "External id": 975557,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936933785.915, "dur": 21.999, + "args": { + "External id": 975558,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 14602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345936933786.615, "dur": 4.731, + "args": { + "External id": 975559,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 14603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936933787.789, "dur": 3.221, + "args": { + "External id": 975560,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345936933791.943, "dur": 15.699, + "args": { + "External id": 975561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345936933792.644, "dur": 14.443, + "args": { + "External id": 975562,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 14606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2338706, + "ts": 6345936933813.073, "dur": 21.054, + "args": { + "External id": 975563,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345936933814.096, "dur": 3.120, + "args": { + "External id": 975564,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345936933818.000, "dur": 15.807, + "args": { + "External id": 975565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 14609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345936933820.842, "dur": 12.499, + "args": { + "External id": 975566,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338706, "tid": 2338706, + "ts": 6345936933841.682, "dur": 30.999, + "args": { + "External id": 975567,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345936933877.355, "dur": 67.278, + "args": { + "External id": 975568,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345936933880.684, "dur": 63.459, + "args": { + "External id": 975569,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936933888.570, "dur": 0.842, + "args": { + "External id": 975570,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 14614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345936933890.929, "dur": 28.697, + "args": { + "External id": 975571,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345936933894.734, "dur": 24.629, + "args": { + "External id": 975572,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 14616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345936933898.090, "dur": 3.025, + "args": { + "External id": 975573,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345936933902.122, "dur": 16.644, + "args": { + "External id": 975574,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 14618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338706, "tid": 2338706, + "ts": 6345936933952.303, "dur": 122943.491, + "args": { + "External id": 975575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 14619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338706, "tid": 2338706, + "ts": 6345936933953.949, "dur": 122939.573, + "args": { + "External id": 975576,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 14620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937056919.459, "dur": 13.659, + "args": { + "External id": 975577,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937056927.350, "dur": 2.013, + "args": { + "External id": 975578,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937056940.950, "dur": 216.289, + "args": { + "External id": 975579,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937056943.925, "dur": 7.912, + "args": { + "External id": 975580,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937056946.646, "dur": 4.173, + "args": { + "External id": 975581,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937056948.421, "dur": 2.077, + "args": { + "External id": 975582,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937056955.984, "dur": 199.888, + "args": { + "External id": 975583,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937056978.925, "dur": 175.001, + "args": { + "External id": 975584,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937057165.709, "dur": 8.730, + "args": { + "External id": 975585,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937057170.217, "dur": 0.870, + "args": { + "External id": 975586,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937057188.556, "dur": 5.824, + "args": { + "External id": 975587,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937057211.356, "dur": 14.103, + "args": { + "External id": 975588,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937057217.821, "dur": 7.297, + "args": { + "External id": 975589,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937057432.540, "dur": 308.298, + "args": { + "External id": 975590,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937057439.301, "dur": 3.857, + "args": { + "External id": 975591,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937057446.086, "dur": 294.120, + "args": { + "External id": 975592,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937057450.250, "dur": 0.848, + "args": { + "External id": 975593,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937057455.334, "dur": 41.907, + "args": { + "External id": 975594,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937057499.939, "dur": 7.368, + "args": { + "External id": 975595,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937057506.126, "dur": 0.790, + "args": { + "External id": 975596,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937057510.205, "dur": 36.951, + "args": { + "External id": 975597,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937057514.550, "dur": 2.542, + "args": { + "External id": 975598,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937057519.788, "dur": 27.019, + "args": { + "External id": 975599,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937057525.758, "dur": 4.291, + "args": { + "External id": 975600,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937057550.644, "dur": 34.688, + "args": { + "External id": 975601,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937057589.713, "dur": 23.580, + "args": { + "External id": 975602,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937057617.985, "dur": 24.178, + "args": { + "External id": 975603,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937057646.479, "dur": 21.864, + "args": { + "External id": 975604,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937057672.304, "dur": 28.817, + "args": { + "External id": 975605,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937057674.806, "dur": 1.501, + "args": { + "External id": 975606,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937057681.147, "dur": 0.700, + "args": { + "External id": 975607,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937057704.101, "dur": 17.680, + "args": { + "External id": 975608,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937057723.772, "dur": 15.170, + "args": { + "External id": 975609,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937057749.274, "dur": 2.508, + "args": { + "External id": 975610,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937057760.821, "dur": 5.666, + "args": { + "External id": 975611,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937057764.542, "dur": 0.740, + "args": { + "External id": 975612,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937057870.008, "dur": 90.924, + "args": { + "External id": 975613,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937057976.501, "dur": 6.426, + "args": { + "External id": 975614,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937057980.587, "dur": 0.900, + "args": { + "External id": 975615,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937057984.658, "dur": 57.989, + "args": { + "External id": 975616,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937058089.528, "dur": 10.860, + "args": { + "External id": 975617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937058093.097, "dur": 6.106, + "args": { + "External id": 975618,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937058095.564, "dur": 3.003, + "args": { + "External id": 975619,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937058108.851, "dur": 73.697, + "args": { + "External id": 975620,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937058110.495, "dur": 71.311, + "args": { + "External id": 975621,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937058189.404, "dur": 21.388, + "args": { + "External id": 975622,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937058219.732, "dur": 7.562, + "args": { + "External id": 975623,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937058222.572, "dur": 3.459, + "args": { + "External id": 975624,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937058232.677, "dur": 55.491, + "args": { + "External id": 975625,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937058233.919, "dur": 6.118, + "args": { + "External id": 975626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937058237.233, "dur": 1.909, + "args": { + "External id": 975627,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937058238.470, "dur": 0.504, + "args": { + "External id": 975628,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937058240.831, "dur": 46.802, + "args": { + "External id": 975629,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937058241.784, "dur": 45.148, + "args": { + "External id": 975630,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937058293.422, "dur": 4.385, + "args": { + "External id": 975631,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937058295.607, "dur": 0.702, + "args": { + "External id": 975632,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937058306.540, "dur": 1.960, + "args": { + "External id": 975633,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937058321.609, "dur": 8.064, + "args": { + "External id": 975634,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937058324.097, "dur": 5.275, + "args": { + "External id": 975635,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937058452.335, "dur": 213.245, + "args": { + "External id": 975636,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937058454.702, "dur": 2.439, + "args": { + "External id": 975637,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937058461.276, "dur": 203.691, + "args": { + "External id": 975638,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937058462.797, "dur": 0.452, + "args": { + "External id": 975639,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937058467.443, "dur": 24.650, + "args": { + "External id": 975640,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937058493.881, "dur": 4.939, + "args": { + "External id": 975641,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937058496.125, "dur": 2.373, + "args": { + "External id": 975642,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937058500.156, "dur": 25.798, + "args": { + "External id": 975643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937058501.114, "dur": 3.532, + "args": { + "External id": 975644,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937058506.667, "dur": 18.924, + "args": { + "External id": 975645,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937058510.005, "dur": 3.258, + "args": { + "External id": 975646,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937058527.953, "dur": 21.883, + "args": { + "External id": 975647,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937058552.257, "dur": 16.267, + "args": { + "External id": 975648,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937058572.157, "dur": 14.286, + "args": { + "External id": 975649,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937058588.127, "dur": 14.411, + "args": { + "External id": 975650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937058607.118, "dur": 24.720, + "args": { + "External id": 975651,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937058609.480, "dur": 2.615, + "args": { + "External id": 975652,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937058614.117, "dur": 3.001, + "args": { + "External id": 975653,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937058633.777, "dur": 15.412, + "args": { + "External id": 975654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937058650.568, "dur": 13.224, + "args": { + "External id": 975655,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937058673.762, "dur": 2.070, + "args": { + "External id": 975656,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937058686.553, "dur": 4.034, + "args": { + "External id": 975657,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937058688.747, "dur": 0.773, + "args": { + "External id": 975658,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937058779.228, "dur": 62.456, + "args": { + "External id": 975659,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937058847.711, "dur": 5.166, + "args": { + "External id": 975660,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937058850.671, "dur": 0.859, + "args": { + "External id": 975661,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937058854.638, "dur": 29.976, + "args": { + "External id": 975662,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937058890.276, "dur": 8.924, + "args": { + "External id": 975663,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937058891.943, "dur": 6.495, + "args": { + "External id": 975664,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937058896.508, "dur": 1.657, + "args": { + "External id": 975665,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937058902.309, "dur": 52.994, + "args": { + "External id": 975666,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937058903.437, "dur": 51.094, + "args": { + "External id": 975667,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937058959.704, "dur": 17.991, + "args": { + "External id": 975668,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937058984.455, "dur": 3.983, + "args": { + "External id": 975669,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937058986.784, "dur": 0.545, + "args": { + "External id": 975670,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937058992.827, "dur": 148.071, + "args": { + "External id": 975671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937058996.053, "dur": 5.733, + "args": { + "External id": 975672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937058996.818, "dur": 4.300, + "args": { + "External id": 975673,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937058998.038, "dur": 2.901, + "args": { + "External id": 975674,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937059002.811, "dur": 137.394, + "args": { + "External id": 975675,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937059003.590, "dur": 135.150, + "args": { + "External id": 975676,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937059148.564, "dur": 5.748, + "args": { + "External id": 975677,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937059151.777, "dur": 0.762, + "args": { + "External id": 975678,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937059164.591, "dur": 2.449, + "args": { + "External id": 975679,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937059177.748, "dur": 8.792, + "args": { + "External id": 975680,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937059179.787, "dur": 6.410, + "args": { + "External id": 975681,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937059301.355, "dur": 216.642, + "args": { + "External id": 975682,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937059305.624, "dur": 2.643, + "args": { + "External id": 975683,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937059310.473, "dur": 206.983, + "args": { + "External id": 975684,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937059314.902, "dur": 0.472, + "args": { + "External id": 975685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937059316.616, "dur": 25.004, + "args": { + "External id": 975686,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937059343.599, "dur": 5.025, + "args": { + "External id": 975687,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937059347.434, "dur": 0.815, + "args": { + "External id": 975688,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937059349.527, "dur": 25.051, + "args": { + "External id": 975689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937059350.795, "dur": 1.751, + "args": { + "External id": 975690,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937059354.642, "dur": 19.505, + "args": { + "External id": 975691,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937059358.638, "dur": 2.599, + "args": { + "External id": 975692,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937059376.338, "dur": 24.800, + "args": { + "External id": 975693,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937059403.151, "dur": 17.037, + "args": { + "External id": 975694,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937059426.312, "dur": 15.846, + "args": { + "External id": 975695,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937059444.016, "dur": 13.021, + "args": { + "External id": 975696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937059459.426, "dur": 22.977, + "args": { + "External id": 975697,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937059461.746, "dur": 1.467, + "args": { + "External id": 975698,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937059465.378, "dur": 0.831, + "args": { + "External id": 975699,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937059483.942, "dur": 14.454, + "args": { + "External id": 975700,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937059502.393, "dur": 13.943, + "args": { + "External id": 975701,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937059525.350, "dur": 2.176, + "args": { + "External id": 975702,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937059537.729, "dur": 4.191, + "args": { + "External id": 975703,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937059540.076, "dur": 0.747, + "args": { + "External id": 975704,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937059620.664, "dur": 70.331, + "args": { + "External id": 975705,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937059696.839, "dur": 5.473, + "args": { + "External id": 975706,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937059700.252, "dur": 0.867, + "args": { + "External id": 975707,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937059704.315, "dur": 31.508, + "args": { + "External id": 975708,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937059743.941, "dur": 6.986, + "args": { + "External id": 975709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937059746.033, "dur": 4.170, + "args": { + "External id": 975710,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937059748.138, "dur": 1.836, + "args": { + "External id": 975711,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937059753.816, "dur": 47.729, + "args": { + "External id": 975712,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937059755.112, "dur": 45.675, + "args": { + "External id": 975713,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937059806.089, "dur": 16.551, + "args": { + "External id": 975714,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937059829.535, "dur": 6.479, + "args": { + "External id": 975715,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937059834.291, "dur": 0.762, + "args": { + "External id": 975716,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937059840.734, "dur": 53.713, + "args": { + "External id": 975717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937059841.744, "dur": 3.364, + "args": { + "External id": 975718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937059842.613, "dur": 1.866, + "args": { + "External id": 975719,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937059843.790, "dur": 0.553, + "args": { + "External id": 975720,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937059845.784, "dur": 48.197, + "args": { + "External id": 975721,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937059849.199, "dur": 44.180, + "args": { + "External id": 975722,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937059899.357, "dur": 4.170, + "args": { + "External id": 975723,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937059901.356, "dur": 0.685, + "args": { + "External id": 975724,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937059909.338, "dur": 2.359, + "args": { + "External id": 975725,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937059920.735, "dur": 8.633, + "args": { + "External id": 975726,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937059922.976, "dur": 6.018, + "args": { + "External id": 975727,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937060089.084, "dur": 209.058, + "args": { + "External id": 975728,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937060094.828, "dur": 4.051, + "args": { + "External id": 975729,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937060100.882, "dur": 196.698, + "args": { + "External id": 975730,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937060102.896, "dur": 0.458, + "args": { + "External id": 975731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937060104.821, "dur": 29.015, + "args": { + "External id": 975732,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937060135.539, "dur": 3.564, + "args": { + "External id": 975733,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937060137.648, "dur": 1.220, + "args": { + "External id": 975734,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937060140.014, "dur": 27.119, + "args": { + "External id": 975735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937060141.146, "dur": 1.595, + "args": { + "External id": 975736,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937060146.661, "dur": 20.115, + "args": { + "External id": 975737,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937060151.187, "dur": 2.906, + "args": { + "External id": 975738,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937060168.633, "dur": 25.011, + "args": { + "External id": 975739,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937060195.690, "dur": 14.905, + "args": { + "External id": 975740,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937060213.616, "dur": 14.603, + "args": { + "External id": 975741,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937060229.952, "dur": 13.837, + "args": { + "External id": 975742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937060245.768, "dur": 20.993, + "args": { + "External id": 975743,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937060248.063, "dur": 1.665, + "args": { + "External id": 975744,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937060251.624, "dur": 0.791, + "args": { + "External id": 975745,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937060270.609, "dur": 13.306, + "args": { + "External id": 975746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937060285.256, "dur": 11.041, + "args": { + "External id": 975747,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937060307.504, "dur": 2.390, + "args": { + "External id": 975748,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937060321.029, "dur": 4.061, + "args": { + "External id": 975749,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937060323.460, "dur": 0.573, + "args": { + "External id": 975750,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937060408.512, "dur": 64.231, + "args": { + "External id": 975751,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937060478.209, "dur": 5.367, + "args": { + "External id": 975752,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937060481.294, "dur": 1.209, + "args": { + "External id": 975753,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937060487.616, "dur": 28.360, + "args": { + "External id": 975754,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937060521.109, "dur": 5.779, + "args": { + "External id": 975755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937060522.995, "dur": 3.136, + "args": { + "External id": 975756,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937060524.837, "dur": 1.070, + "args": { + "External id": 975757,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937060529.948, "dur": 46.528, + "args": { + "External id": 975758,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937060531.046, "dur": 44.473, + "args": { + "External id": 975759,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937060581.084, "dur": 16.106, + "args": { + "External id": 975760,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937060606.425, "dur": 4.189, + "args": { + "External id": 975761,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937060608.870, "dur": 0.773, + "args": { + "External id": 975762,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937060615.218, "dur": 53.435, + "args": { + "External id": 975763,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937060616.341, "dur": 5.162, + "args": { + "External id": 975764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937060617.253, "dur": 3.549, + "args": { + "External id": 975765,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937060618.313, "dur": 2.229, + "args": { + "External id": 975766,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937060624.634, "dur": 43.544, + "args": { + "External id": 975767,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937060625.243, "dur": 42.224, + "args": { + "External id": 975768,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937060673.276, "dur": 3.779, + "args": { + "External id": 975769,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937060675.146, "dur": 0.504, + "args": { + "External id": 975770,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937060683.540, "dur": 1.606, + "args": { + "External id": 975771,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937060693.950, "dur": 8.499, + "args": { + "External id": 975772,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937060698.483, "dur": 3.587, + "args": { + "External id": 975773,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937060798.230, "dur": 189.621, + "args": { + "External id": 975774,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937060800.832, "dur": 2.009, + "args": { + "External id": 975775,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937060804.678, "dur": 182.731, + "args": { + "External id": 975776,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937060806.399, "dur": 0.414, + "args": { + "External id": 975777,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937060809.178, "dur": 21.686, + "args": { + "External id": 975778,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937060832.663, "dur": 2.692, + "args": { + "External id": 975779,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937060834.616, "dur": 0.530, + "args": { + "External id": 975780,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937060836.230, "dur": 24.762, + "args": { + "External id": 975781,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937060839.765, "dur": 3.014, + "args": { + "External id": 975782,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937060844.092, "dur": 16.573, + "args": { + "External id": 975783,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937060846.355, "dur": 2.309, + "args": { + "External id": 975784,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937060862.426, "dur": 21.582, + "args": { + "External id": 975785,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937060885.834, "dur": 13.550, + "args": { + "External id": 975786,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937060902.314, "dur": 14.905, + "args": { + "External id": 975787,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937060918.582, "dur": 12.867, + "args": { + "External id": 975788,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937060933.430, "dur": 22.787, + "args": { + "External id": 975789,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937060935.728, "dur": 1.540, + "args": { + "External id": 975790,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937060941.177, "dur": 0.526, + "args": { + "External id": 975791,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937060957.995, "dur": 13.419, + "args": { + "External id": 975792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937060972.554, "dur": 13.883, + "args": { + "External id": 975793,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937060994.334, "dur": 1.832, + "args": { + "External id": 975794,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937061004.682, "dur": 23.743, + "args": { + "External id": 975795,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937061025.327, "dur": 1.297, + "args": { + "External id": 975796,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937061148.579, "dur": 67.646, + "args": { + "External id": 975797,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937061225.234, "dur": 6.277, + "args": { + "External id": 975798,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937061228.816, "dur": 1.021, + "args": { + "External id": 975799,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937061233.008, "dur": 29.312, + "args": { + "External id": 975800,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937061267.705, "dur": 7.556, + "args": { + "External id": 975801,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937061269.323, "dur": 5.121, + "args": { + "External id": 975802,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937061271.075, "dur": 3.046, + "args": { + "External id": 975803,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937061281.091, "dur": 47.267, + "args": { + "External id": 975804,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937061282.374, "dur": 45.131, + "args": { + "External id": 975805,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937061332.807, "dur": 18.818, + "args": { + "External id": 975806,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937061358.169, "dur": 3.867, + "args": { + "External id": 975807,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937061360.473, "dur": 0.603, + "args": { + "External id": 975808,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937061366.742, "dur": 54.251, + "args": { + "External id": 975809,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937061367.960, "dur": 6.007, + "args": { + "External id": 975810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937061371.194, "dur": 2.060, + "args": { + "External id": 975811,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937061372.375, "dur": 0.617, + "args": { + "External id": 975812,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937061374.788, "dur": 45.858, + "args": { + "External id": 975813,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937061375.638, "dur": 44.076, + "args": { + "External id": 975814,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937061425.166, "dur": 3.957, + "args": { + "External id": 975815,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937061427.227, "dur": 0.514, + "args": { + "External id": 975816,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937061435.957, "dur": 1.748, + "args": { + "External id": 975817,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937061448.232, "dur": 7.029, + "args": { + "External id": 975818,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937061450.533, "dur": 4.419, + "args": { + "External id": 975819,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937061546.854, "dur": 221.867, + "args": { + "External id": 975820,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937061549.334, "dur": 2.365, + "args": { + "External id": 975821,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937061554.730, "dur": 213.470, + "args": { + "External id": 975822,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937061556.240, "dur": 0.417, + "args": { + "External id": 975823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937061560.598, "dur": 22.181, + "args": { + "External id": 975824,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937061584.678, "dur": 25.735, + "args": { + "External id": 975825,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937061607.792, "dur": 2.307, + "args": { + "External id": 975826,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937061611.353, "dur": 22.775, + "args": { + "External id": 975827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937061612.952, "dur": 1.759, + "args": { + "External id": 975828,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937061616.033, "dur": 17.738, + "args": { + "External id": 975829,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937061618.464, "dur": 2.849, + "args": { + "External id": 975830,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937061635.791, "dur": 24.897, + "args": { + "External id": 975831,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937061662.472, "dur": 14.704, + "args": { + "External id": 975832,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937061682.694, "dur": 14.094, + "args": { + "External id": 975833,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937061698.403, "dur": 13.483, + "args": { + "External id": 975834,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937061714.212, "dur": 24.654, + "args": { + "External id": 975835,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937061716.723, "dur": 1.553, + "args": { + "External id": 975836,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937061724.000, "dur": 0.696, + "args": { + "External id": 975837,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937061740.379, "dur": 12.586, + "args": { + "External id": 975838,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937061756.631, "dur": 10.336, + "args": { + "External id": 975839,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937061776.389, "dur": 1.677, + "args": { + "External id": 975840,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937061787.151, "dur": 4.202, + "args": { + "External id": 975841,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937061789.849, "dur": 0.360, + "args": { + "External id": 975842,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937061866.246, "dur": 57.681, + "args": { + "External id": 975843,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937061929.273, "dur": 6.301, + "args": { + "External id": 975844,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937061931.931, "dur": 2.362, + "args": { + "External id": 975845,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937061937.148, "dur": 25.387, + "args": { + "External id": 975846,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937061967.697, "dur": 7.157, + "args": { + "External id": 975847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937061971.194, "dur": 2.896, + "args": { + "External id": 975848,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937061972.920, "dur": 0.989, + "args": { + "External id": 975849,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937061977.694, "dur": 67.011, + "args": { + "External id": 975850,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937061978.735, "dur": 64.607, + "args": { + "External id": 975851,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937062050.880, "dur": 57.511, + "args": { + "External id": 975852,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937062118.937, "dur": 7.957, + "args": { + "External id": 975853,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937062124.506, "dur": 0.956, + "args": { + "External id": 975854,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937062132.308, "dur": 57.371, + "args": { + "External id": 975855,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937062133.167, "dur": 4.072, + "args": { + "External id": 975856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937062134.320, "dur": 2.261, + "args": { + "External id": 975857,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937062135.681, "dur": 0.746, + "args": { + "External id": 975858,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937062137.967, "dur": 51.281, + "args": { + "External id": 975859,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937062138.808, "dur": 49.576, + "args": { + "External id": 975860,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937062196.696, "dur": 4.058, + "args": { + "External id": 975861,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937062198.674, "dur": 0.562, + "args": { + "External id": 975862,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937062208.798, "dur": 1.887, + "args": { + "External id": 975863,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937062220.468, "dur": 8.065, + "args": { + "External id": 975864,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937062222.988, "dur": 5.246, + "args": { + "External id": 975865,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937062337.467, "dur": 284.119, + "args": { + "External id": 975866,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937062340.048, "dur": 4.047, + "args": { + "External id": 975867,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937062345.575, "dur": 275.310, + "args": { + "External id": 975868,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937062347.196, "dur": 0.526, + "args": { + "External id": 975869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937062349.334, "dur": 24.383, + "args": { + "External id": 975870,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937062375.500, "dur": 5.452, + "args": { + "External id": 975871,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937062379.994, "dur": 0.660, + "args": { + "External id": 975872,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937062457.872, "dur": 29.380, + "args": { + "External id": 975873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937062462.743, "dur": 2.340, + "args": { + "External id": 975874,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937062466.761, "dur": 19.962, + "args": { + "External id": 975875,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937062469.941, "dur": 3.173, + "args": { + "External id": 975876,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937062488.997, "dur": 26.302, + "args": { + "External id": 975877,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937062517.110, "dur": 14.664, + "args": { + "External id": 975878,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937062534.842, "dur": 15.385, + "args": { + "External id": 975879,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937062551.649, "dur": 14.020, + "args": { + "External id": 975880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937062567.649, "dur": 24.664, + "args": { + "External id": 975881,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937062571.057, "dur": 1.485, + "args": { + "External id": 975882,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937062576.649, "dur": 0.638, + "args": { + "External id": 975883,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937062593.938, "dur": 12.986, + "args": { + "External id": 975884,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937062608.061, "dur": 11.553, + "args": { + "External id": 975885,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937062630.129, "dur": 2.151, + "args": { + "External id": 975886,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937062643.258, "dur": 3.884, + "args": { + "External id": 975887,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937062645.699, "dur": 0.445, + "args": { + "External id": 975888,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937062725.537, "dur": 64.164, + "args": { + "External id": 975889,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937062797.814, "dur": 4.840, + "args": { + "External id": 975890,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937062800.643, "dur": 0.784, + "args": { + "External id": 975891,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937062804.179, "dur": 31.498, + "args": { + "External id": 975892,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937062840.431, "dur": 5.730, + "args": { + "External id": 975893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937062842.321, "dur": 3.123, + "args": { + "External id": 975894,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937062843.940, "dur": 1.295, + "args": { + "External id": 975895,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937062852.075, "dur": 45.832, + "args": { + "External id": 975896,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937062853.316, "dur": 43.768, + "args": { + "External id": 975897,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937062902.444, "dur": 15.359, + "args": { + "External id": 975898,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937062924.430, "dur": 4.031, + "args": { + "External id": 975899,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937062926.798, "dur": 0.764, + "args": { + "External id": 975900,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937062932.833, "dur": 52.183, + "args": { + "External id": 975901,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937062933.914, "dur": 5.771, + "args": { + "External id": 975902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937062937.223, "dur": 1.841, + "args": { + "External id": 975903,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937062938.269, "dur": 0.635, + "args": { + "External id": 975904,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937062940.389, "dur": 44.220, + "args": { + "External id": 975905,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937062940.901, "dur": 42.844, + "args": { + "External id": 975906,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937062989.370, "dur": 3.909, + "args": { + "External id": 975907,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937062991.458, "dur": 0.606, + "args": { + "External id": 975908,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937062999.232, "dur": 1.587, + "args": { + "External id": 975909,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937063032.145, "dur": 9.001, + "args": { + "External id": 975910,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937063035.324, "dur": 5.225, + "args": { + "External id": 975911,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937063191.457, "dur": 204.237, + "args": { + "External id": 975912,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937063197.328, "dur": 3.724, + "args": { + "External id": 975913,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937063202.784, "dur": 192.315, + "args": { + "External id": 975914,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937063204.217, "dur": 0.467, + "args": { + "External id": 975915,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937063208.752, "dur": 25.739, + "args": { + "External id": 975916,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937063236.333, "dur": 5.075, + "args": { + "External id": 975917,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937063240.265, "dur": 0.873, + "args": { + "External id": 975918,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937063242.459, "dur": 22.503, + "args": { + "External id": 975919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937063243.506, "dur": 1.704, + "args": { + "External id": 975920,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937063246.730, "dur": 17.812, + "args": { + "External id": 975921,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937063249.620, "dur": 2.234, + "args": { + "External id": 975922,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937063266.771, "dur": 24.462, + "args": { + "External id": 975923,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937063292.813, "dur": 18.267, + "args": { + "External id": 975924,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937063314.111, "dur": 14.914, + "args": { + "External id": 975925,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937063330.559, "dur": 13.351, + "args": { + "External id": 975926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937063348.183, "dur": 20.325, + "args": { + "External id": 975927,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937063350.559, "dur": 1.542, + "args": { + "External id": 975928,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937063354.066, "dur": 0.487, + "args": { + "External id": 975929,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937063369.934, "dur": 12.426, + "args": { + "External id": 975930,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937063383.540, "dur": 10.597, + "args": { + "External id": 975931,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937063403.642, "dur": 2.152, + "args": { + "External id": 975932,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937063416.151, "dur": 4.247, + "args": { + "External id": 975933,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937063418.734, "dur": 0.621, + "args": { + "External id": 975934,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937063497.405, "dur": 67.105, + "args": { + "External id": 975935,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937063570.059, "dur": 4.946, + "args": { + "External id": 975936,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937063572.970, "dur": 0.752, + "args": { + "External id": 975937,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937063576.589, "dur": 29.806, + "args": { + "External id": 975938,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937063611.659, "dur": 8.285, + "args": { + "External id": 975939,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937063613.332, "dur": 5.842, + "args": { + "External id": 975940,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937063617.555, "dur": 1.433, + "args": { + "External id": 975941,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937063622.894, "dur": 48.589, + "args": { + "External id": 975942,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937063624.022, "dur": 46.786, + "args": { + "External id": 975943,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937063675.455, "dur": 18.914, + "args": { + "External id": 975944,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937063699.788, "dur": 29.764, + "args": { + "External id": 975945,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937063702.392, "dur": 26.707, + "args": { + "External id": 975946,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937063707.565, "dur": 2.576, + "args": { + "External id": 975947,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 14991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345937063736.713, "dur": 34.487, + "args": { + "External id": 975948,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 14992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345937063738.688, "dur": 32.220, + "args": { + "External id": 975949,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 14993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937063745.705, "dur": 3.862, + "args": { + "External id": 975950,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937063750.819, "dur": 19.532, + "args": { + "External id": 975951,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2338706, + "ts": 6345937063785.706, "dur": 7.626, + "args": { + "External id": 975952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 14996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2338706, + "ts": 6345937063789.160, "dur": 3.874, + "args": { + "External id": 975953,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 14997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2338706, + "ts": 6345937063794.522, "dur": 1.560, + "args": { + "External id": 975954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2338706, + "ts": 6345937063795.090, "dur": 0.887, + "args": { + "External id": 975955,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937063844.840, "dur": 27.427, + "args": { + "External id": 975956,"Sequence number": 10552443, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15000 + } + }, + { + "ph": "s", "id": 23, "pid": 2338706, "tid": 2338706, "ts": 6345937063844.840, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937063878.872, "dur": 20.787, + "args": { + "External id": 975957,"Sequence number": 10552444, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 15001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937063895.596, "dur": 1.364, + "args": { + "External id": 975958,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2338706, + "ts": 6345937063902.430, "dur": 10.203, + "args": { + "External id": 975959,"Sequence number": 10552444, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "1"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 15003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937063907.308, "dur": 4.099, + "args": { + "External id": 975960,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937063913.987, "dur": 5.164, + "args": { + "External id": 975961,"Sequence number": 10552444, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 15005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937063917.912, "dur": 0.544, + "args": { + "External id": 975962,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 15006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937063924.409, "dur": 6.351, + "args": { + "External id": 975963,"Sequence number": 10552444, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15007 + } + }, + { + "ph": "s", "id": 22, "pid": 2338706, "tid": 2338706, "ts": 6345937063924.409, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937063928.167, "dur": 0.853, + "args": { + "External id": 975964,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937063931.892, "dur": 4.960, + "args": { + "External id": 975965,"Sequence number": 10552445, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15009 + } + }, + { + "ph": "s", "id": 21, "pid": 2338706, "tid": 2338706, "ts": 6345937063931.892, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937063935.522, "dur": 0.410, + "args": { + "External id": 975966,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2338706, + "ts": 6345937063937.939, "dur": 7.027, + "args": { + "External id": 975967,"Sequence number": 10552446, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 15011 + } + }, + { + "ph": "s", "id": 20, "pid": 2338706, "tid": 2338706, "ts": 6345937063937.939, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937063943.311, "dur": 0.683, + "args": { + "External id": 975968,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937063946.150, "dur": 3.642, + "args": { + "External id": 975969,"Sequence number": 10552447, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 15013 + } + }, + { + "ph": "s", "id": 19, "pid": 2338706, "tid": 2338706, "ts": 6345937063946.150, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937063948.454, "dur": 0.478, + "args": { + "External id": 975970,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 15014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2338706, + "ts": 6345937063954.269, "dur": 37.405, + "args": { + "External id": 975971,"Sequence number": 10552448, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2338706, + "ts": 6345937063956.255, "dur": 35.111, + "args": { + "External id": 975972,"Sequence number": 10552448, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937063959.045, "dur": 9.353, + "args": { + "External id": 975973,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 15017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937063963.773, "dur": 4.002, + "args": { + "External id": 975974,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937063969.449, "dur": 21.313, + "args": { + "External id": 975975,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 15019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937064047.899, "dur": 6.097, + "args": { + "External id": 975976,"Sequence number": 10552448, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 15020 + } + }, + { + "ph": "s", "id": 18, "pid": 2338706, "tid": 2338706, "ts": 6345937064047.899, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937064097.232, "dur": 5.281, + "args": { + "External id": 975977,"Sequence number": 10552449, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345937064141.459, "dur": 45233.055, + "args": { + "External id": 975978,"Sequence number": 10552449, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 15022 + } + }, + { + "ph": "s", "id": 17, "pid": 2338706, "tid": 2338706, "ts": 6345937064141.459, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2338706, + "ts": 6345937064160.906, "dur": 37.458, + "args": { + "External id": 975979,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2338706, + "ts": 6345937064161.654, "dur": 36.473, + "args": { + "External id": 975980,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937064164.305, "dur": 9.474, + "args": { + "External id": 975981,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937064168.474, "dur": 4.751, + "args": { + "External id": 975982,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937064174.796, "dur": 22.797, + "args": { + "External id": 975983,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 15027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937064217.488, "dur": 29.877, + "args": { + "External id": 975984,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937064218.685, "dur": 5.920, + "args": { + "External id": 975985,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937064220.760, "dur": 3.457, + "args": { + "External id": 975986,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345937064226.134, "dur": 20.961, + "args": { + "External id": 975987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937064227.918, "dur": 18.639, + "args": { + "External id": 975988,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937064251.356, "dur": 18.902, + "args": { + "External id": 975989,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937064252.075, "dur": 3.993, + "args": { + "External id": 975990,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937064253.434, "dur": 2.313, + "args": { + "External id": 975991,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345937064256.682, "dur": 13.343, + "args": { + "External id": 975992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937064257.288, "dur": 12.352, + "args": { + "External id": 975993,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 15037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2338706, + "ts": 6345937064279.494, "dur": 18.074, + "args": { + "External id": 975994,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 15038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937064281.245, "dur": 3.169, + "args": { + "External id": 975995,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345937064285.156, "dur": 12.109, + "args": { + "External id": 975996,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 15040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937064286.381, "dur": 10.540, + "args": { + "External id": 975997,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338706, "tid": 2338706, + "ts": 6345937064303.454, "dur": 27.123, + "args": { + "External id": 975998,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937064333.675, "dur": 57.103, + "args": { + "External id": 975999,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937064335.732, "dur": 54.413, + "args": { + "External id": 976000,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937064340.422, "dur": 0.862, + "args": { + "External id": 976001,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345937064342.611, "dur": 29.463, + "args": { + "External id": 976002,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345937064345.989, "dur": 25.846, + "args": { + "External id": 976003,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 15047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937064348.814, "dur": 5.273, + "args": { + "External id": 976004,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937064357.216, "dur": 14.057, + "args": { + "External id": 976005,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 15049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338706, "tid": 2338706, + "ts": 6345937064395.927, "dur": 38234.982, + "args": { + "External id": 976006,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338706, "tid": 2338706, + "ts": 6345937064397.660, "dur": 38232.170, + "args": { + "External id": 976007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937102645.178, "dur": 9.438, + "args": { + "External id": 976008,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937102650.424, "dur": 1.187, + "args": { + "External id": 976009,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937102661.703, "dur": 131.018, + "args": { + "External id": 976010,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937102663.397, "dur": 12.013, + "args": { + "External id": 976011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937102669.793, "dur": 4.642, + "args": { + "External id": 976012,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937102673.096, "dur": 0.978, + "args": { + "External id": 976013,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937102676.745, "dur": 115.076, + "args": { + "External id": 976014,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937102679.579, "dur": 111.157, + "args": { + "External id": 976015,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937102798.292, "dur": 5.829, + "args": { + "External id": 976016,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937102801.265, "dur": 1.202, + "args": { + "External id": 976017,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937102812.297, "dur": 3.401, + "args": { + "External id": 976018,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937102828.176, "dur": 8.152, + "args": { + "External id": 976019,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937102831.510, "dur": 4.496, + "args": { + "External id": 976020,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937103035.484, "dur": 312.898, + "args": { + "External id": 976021,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937103040.026, "dur": 6.834, + "args": { + "External id": 976022,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937103049.459, "dur": 297.495, + "args": { + "External id": 976023,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937103082.454, "dur": 0.676, + "args": { + "External id": 976024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937103089.141, "dur": 41.892, + "args": { + "External id": 976025,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937103133.701, "dur": 6.498, + "args": { + "External id": 976026,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937103138.696, "dur": 1.132, + "args": { + "External id": 976027,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937103142.696, "dur": 31.758, + "args": { + "External id": 976028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937103143.805, "dur": 1.652, + "args": { + "External id": 976029,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937103146.949, "dur": 27.182, + "args": { + "External id": 976030,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937103152.168, "dur": 4.716, + "args": { + "External id": 976031,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937103177.825, "dur": 28.713, + "args": { + "External id": 976032,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937103209.587, "dur": 17.789, + "args": { + "External id": 976033,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937103231.046, "dur": 23.179, + "args": { + "External id": 976034,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937103259.731, "dur": 17.801, + "args": { + "External id": 976035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937103279.900, "dur": 28.661, + "args": { + "External id": 976036,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937103284.011, "dur": 2.885, + "args": { + "External id": 976037,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937103289.133, "dur": 0.747, + "args": { + "External id": 976038,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937103311.346, "dur": 15.830, + "args": { + "External id": 976039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937103328.687, "dur": 16.492, + "args": { + "External id": 976040,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937103363.611, "dur": 3.728, + "args": { + "External id": 976041,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937103377.223, "dur": 5.933, + "args": { + "External id": 976042,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937103381.136, "dur": 0.850, + "args": { + "External id": 976043,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937103483.302, "dur": 87.498, + "args": { + "External id": 976044,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937103577.376, "dur": 10.525, + "args": { + "External id": 976045,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937103580.302, "dur": 4.348, + "args": { + "External id": 976046,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937103589.735, "dur": 33.688, + "args": { + "External id": 976047,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937103630.030, "dur": 9.244, + "args": { + "External id": 976048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937103634.216, "dur": 4.034, + "args": { + "External id": 976049,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937103636.401, "dur": 1.592, + "args": { + "External id": 976050,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937103642.790, "dur": 53.752, + "args": { + "External id": 976051,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937103644.025, "dur": 51.703, + "args": { + "External id": 976052,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937103701.766, "dur": 17.942, + "args": { + "External id": 976053,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937103726.816, "dur": 7.019, + "args": { + "External id": 976054,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937103731.862, "dur": 0.721, + "args": { + "External id": 976055,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937103738.923, "dur": 56.149, + "args": { + "External id": 976056,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937103739.981, "dur": 7.827, + "args": { + "External id": 976057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937103740.748, "dur": 6.312, + "args": { + "External id": 976058,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937103742.045, "dur": 4.830, + "args": { + "External id": 976059,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937103748.541, "dur": 46.080, + "args": { + "External id": 976060,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937103749.221, "dur": 44.847, + "args": { + "External id": 976061,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937103802.293, "dur": 24.188, + "args": { + "External id": 976062,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937103823.343, "dur": 0.576, + "args": { + "External id": 976063,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937103833.414, "dur": 1.837, + "args": { + "External id": 976064,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937103844.770, "dur": 8.099, + "args": { + "External id": 976065,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937103846.926, "dur": 5.629, + "args": { + "External id": 976066,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937103966.003, "dur": 267.958, + "args": { + "External id": 976067,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937103968.354, "dur": 4.401, + "args": { + "External id": 976068,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937103974.611, "dur": 258.739, + "args": { + "External id": 976069,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937103976.210, "dur": 0.416, + "args": { + "External id": 976070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937103977.760, "dur": 24.729, + "args": { + "External id": 976071,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937104004.397, "dur": 19.299, + "args": { + "External id": 976072,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937104006.497, "dur": 0.572, + "args": { + "External id": 976073,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937104025.983, "dur": 68.993, + "args": { + "External id": 976074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937104027.670, "dur": 2.665, + "args": { + "External id": 976075,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937104031.825, "dur": 62.271, + "args": { + "External id": 976076,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937104039.832, "dur": 3.216, + "args": { + "External id": 976077,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937104097.493, "dur": 25.485, + "args": { + "External id": 976078,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937104125.002, "dur": 16.282, + "args": { + "External id": 976079,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937104144.686, "dur": 14.854, + "args": { + "External id": 976080,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937104161.186, "dur": 13.964, + "args": { + "External id": 976081,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937104177.538, "dur": 23.876, + "args": { + "External id": 976082,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937104179.923, "dur": 2.373, + "args": { + "External id": 976083,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937104184.652, "dur": 0.856, + "args": { + "External id": 976084,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937104205.763, "dur": 13.125, + "args": { + "External id": 976085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937104220.139, "dur": 11.701, + "args": { + "External id": 976086,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937104244.152, "dur": 2.929, + "args": { + "External id": 976087,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937104259.419, "dur": 4.516, + "args": { + "External id": 976088,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937104262.349, "dur": 0.495, + "args": { + "External id": 976089,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937104351.010, "dur": 68.270, + "args": { + "External id": 976090,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937104429.827, "dur": 5.196, + "args": { + "External id": 976091,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937104432.763, "dur": 0.789, + "args": { + "External id": 976092,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937104436.784, "dur": 29.450, + "args": { + "External id": 976093,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937104473.773, "dur": 9.004, + "args": { + "External id": 976094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937104475.523, "dur": 6.479, + "args": { + "External id": 976095,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937104477.404, "dur": 4.326, + "args": { + "External id": 976096,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937104486.191, "dur": 50.031, + "args": { + "External id": 976097,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937104487.353, "dur": 48.108, + "args": { + "External id": 976098,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937104540.857, "dur": 20.893, + "args": { + "External id": 976099,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937104568.498, "dur": 6.363, + "args": { + "External id": 976100,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937104573.147, "dur": 0.649, + "args": { + "External id": 976101,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937104579.783, "dur": 55.563, + "args": { + "External id": 976102,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937104580.862, "dur": 3.494, + "args": { + "External id": 976103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937104581.766, "dur": 1.929, + "args": { + "External id": 976104,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937104582.986, "dur": 0.539, + "args": { + "External id": 976105,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937104584.990, "dur": 49.901, + "args": { + "External id": 976106,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937104588.221, "dur": 45.838, + "args": { + "External id": 976107,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937104640.125, "dur": 4.314, + "args": { + "External id": 976108,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937104642.300, "dur": 0.679, + "args": { + "External id": 976109,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937104650.871, "dur": 1.968, + "args": { + "External id": 976110,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937104666.711, "dur": 11.510, + "args": { + "External id": 976111,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937104671.651, "dur": 6.231, + "args": { + "External id": 976112,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937104785.422, "dur": 203.990, + "args": { + "External id": 976113,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937104788.025, "dur": 3.704, + "args": { + "External id": 976114,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937104793.168, "dur": 195.702, + "args": { + "External id": 976115,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937104794.909, "dur": 0.462, + "args": { + "External id": 976116,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937104796.645, "dur": 30.389, + "args": { + "External id": 976117,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937104828.945, "dur": 3.301, + "args": { + "External id": 976118,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937104830.960, "dur": 1.009, + "args": { + "External id": 976119,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937104833.615, "dur": 31.403, + "args": { + "External id": 976120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937104836.818, "dur": 1.393, + "args": { + "External id": 976121,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937104839.466, "dur": 25.149, + "args": { + "External id": 976122,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937104844.391, "dur": 2.742, + "args": { + "External id": 976123,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937104866.685, "dur": 23.133, + "args": { + "External id": 976124,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937104891.198, "dur": 13.625, + "args": { + "External id": 976125,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937104907.790, "dur": 15.057, + "args": { + "External id": 976126,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937104924.265, "dur": 12.506, + "args": { + "External id": 976127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937104938.775, "dur": 21.820, + "args": { + "External id": 976128,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937104940.641, "dur": 1.510, + "args": { + "External id": 976129,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937104946.366, "dur": 0.864, + "args": { + "External id": 976130,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937104962.242, "dur": 12.817, + "args": { + "External id": 976131,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937104976.133, "dur": 11.547, + "args": { + "External id": 976132,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937104996.725, "dur": 1.935, + "args": { + "External id": 976133,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937105028.010, "dur": 6.489, + "args": { + "External id": 976134,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937105032.075, "dur": 0.895, + "args": { + "External id": 976135,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937105160.619, "dur": 73.377, + "args": { + "External id": 976136,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937105242.702, "dur": 6.715, + "args": { + "External id": 976137,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937105246.316, "dur": 1.215, + "args": { + "External id": 976138,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937105251.108, "dur": 32.156, + "args": { + "External id": 976139,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937105288.836, "dur": 5.938, + "args": { + "External id": 976140,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937105290.486, "dur": 3.567, + "args": { + "External id": 976141,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937105292.673, "dur": 1.113, + "args": { + "External id": 976142,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937105300.400, "dur": 51.613, + "args": { + "External id": 976143,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937105301.826, "dur": 49.304, + "args": { + "External id": 976144,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937105357.052, "dur": 20.584, + "args": { + "External id": 976145,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937105384.771, "dur": 4.285, + "args": { + "External id": 976146,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937105387.245, "dur": 0.749, + "args": { + "External id": 976147,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937105394.038, "dur": 56.020, + "args": { + "External id": 976148,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937105394.930, "dur": 8.622, + "args": { + "External id": 976149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937105398.815, "dur": 4.005, + "args": { + "External id": 976150,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937105400.031, "dur": 2.595, + "args": { + "External id": 976151,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937105404.347, "dur": 45.300, + "args": { + "External id": 976152,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937105404.945, "dur": 44.121, + "args": { + "External id": 976153,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937105455.800, "dur": 3.555, + "args": { + "External id": 976154,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937105457.516, "dur": 0.655, + "args": { + "External id": 976155,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937105466.147, "dur": 1.620, + "args": { + "External id": 976156,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937105479.106, "dur": 7.882, + "args": { + "External id": 976157,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937105481.435, "dur": 5.237, + "args": { + "External id": 976158,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937105585.280, "dur": 195.802, + "args": { + "External id": 976159,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937105588.049, "dur": 2.302, + "args": { + "External id": 976160,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937105591.625, "dur": 188.948, + "args": { + "External id": 976161,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937105593.043, "dur": 0.378, + "args": { + "External id": 976162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937105598.419, "dur": 23.233, + "args": { + "External id": 976163,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937105623.606, "dur": 5.081, + "args": { + "External id": 976164,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937105625.873, "dur": 2.523, + "args": { + "External id": 976165,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937105629.711, "dur": 23.656, + "args": { + "External id": 976166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937105630.694, "dur": 3.137, + "args": { + "External id": 976167,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937105635.207, "dur": 17.789, + "args": { + "External id": 976168,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937105638.029, "dur": 2.628, + "args": { + "External id": 976169,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937105654.834, "dur": 20.674, + "args": { + "External id": 976170,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937105676.940, "dur": 15.417, + "args": { + "External id": 976171,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937105694.878, "dur": 14.013, + "args": { + "External id": 976172,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937105710.573, "dur": 13.794, + "args": { + "External id": 976173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937105728.746, "dur": 20.517, + "args": { + "External id": 976174,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937105731.086, "dur": 1.703, + "args": { + "External id": 976175,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937105734.606, "dur": 0.801, + "args": { + "External id": 976176,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937105750.960, "dur": 14.824, + "args": { + "External id": 976177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937105767.006, "dur": 12.246, + "args": { + "External id": 976178,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937105788.061, "dur": 1.936, + "args": { + "External id": 976179,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937105800.210, "dur": 3.756, + "args": { + "External id": 976180,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937105802.436, "dur": 0.464, + "args": { + "External id": 976181,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937105879.165, "dur": 54.139, + "args": { + "External id": 976182,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937105939.206, "dur": 4.777, + "args": { + "External id": 976183,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937105941.760, "dur": 0.879, + "args": { + "External id": 976184,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937105945.370, "dur": 24.993, + "args": { + "External id": 976185,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937105975.234, "dur": 8.644, + "args": { + "External id": 976186,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937105976.817, "dur": 6.293, + "args": { + "External id": 976187,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937105980.521, "dur": 2.334, + "args": { + "External id": 976188,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937105986.649, "dur": 113.746, + "args": { + "External id": 976189,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937105987.639, "dur": 111.437, + "args": { + "External id": 976190,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937106107.982, "dur": 20.458, + "args": { + "External id": 976191,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937106136.339, "dur": 7.338, + "args": { + "External id": 976192,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937106139.348, "dur": 2.943, + "args": { + "External id": 976193,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937106148.248, "dur": 57.647, + "args": { + "External id": 976194,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937106151.869, "dur": 4.078, + "args": { + "External id": 976195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937106153.029, "dur": 2.293, + "args": { + "External id": 976196,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937106154.335, "dur": 0.821, + "args": { + "External id": 976197,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937106156.626, "dur": 48.802, + "args": { + "External id": 976198,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937106157.540, "dur": 46.955, + "args": { + "External id": 976199,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937106211.066, "dur": 3.948, + "args": { + "External id": 976200,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937106213.171, "dur": 0.531, + "args": { + "External id": 976201,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937106224.465, "dur": 1.777, + "args": { + "External id": 976202,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937106235.182, "dur": 7.160, + "args": { + "External id": 976203,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937106237.100, "dur": 4.915, + "args": { + "External id": 976204,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937106347.943, "dur": 212.433, + "args": { + "External id": 976205,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937106350.182, "dur": 2.140, + "args": { + "External id": 976206,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937106355.145, "dur": 204.693, + "args": { + "External id": 976207,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937106359.122, "dur": 0.437, + "args": { + "External id": 976208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937106360.717, "dur": 24.963, + "args": { + "External id": 976209,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937106387.602, "dur": 5.064, + "args": { + "External id": 976210,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937106389.874, "dur": 2.478, + "args": { + "External id": 976211,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937106393.996, "dur": 27.656, + "args": { + "External id": 976212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937106395.003, "dur": 4.003, + "args": { + "External id": 976213,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937106400.377, "dur": 20.925, + "args": { + "External id": 976214,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937106403.558, "dur": 3.183, + "args": { + "External id": 976215,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937106423.200, "dur": 23.542, + "args": { + "External id": 976216,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937106448.394, "dur": 17.012, + "args": { + "External id": 976217,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937106471.142, "dur": 16.761, + "args": { + "External id": 976218,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937106489.548, "dur": 15.046, + "args": { + "External id": 976219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937106506.586, "dur": 22.260, + "args": { + "External id": 976220,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937106508.542, "dur": 1.508, + "args": { + "External id": 976221,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937106511.870, "dur": 2.787, + "args": { + "External id": 976222,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937106530.396, "dur": 12.659, + "args": { + "External id": 976223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937106546.813, "dur": 11.538, + "args": { + "External id": 976224,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937106567.946, "dur": 1.937, + "args": { + "External id": 976225,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937106579.688, "dur": 4.028, + "args": { + "External id": 976226,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937106582.144, "dur": 0.398, + "args": { + "External id": 976227,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937106658.422, "dur": 60.920, + "args": { + "External id": 976228,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937106725.102, "dur": 4.885, + "args": { + "External id": 976229,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937106727.770, "dur": 0.893, + "args": { + "External id": 976230,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937106731.758, "dur": 27.493, + "args": { + "External id": 976231,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937106766.724, "dur": 5.910, + "args": { + "External id": 976232,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937106768.470, "dur": 3.390, + "args": { + "External id": 976233,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937106770.303, "dur": 1.283, + "args": { + "External id": 976234,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937106775.537, "dur": 46.375, + "args": { + "External id": 976235,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937106776.547, "dur": 44.593, + "args": { + "External id": 976236,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937106826.256, "dur": 16.277, + "args": { + "External id": 976237,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937106849.245, "dur": 6.225, + "args": { + "External id": 976238,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937106853.658, "dur": 0.729, + "args": { + "External id": 976239,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937106859.624, "dur": 53.026, + "args": { + "External id": 976240,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937106860.377, "dur": 5.579, + "args": { + "External id": 976241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937106861.065, "dur": 4.319, + "args": { + "External id": 976242,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937106862.329, "dur": 2.898, + "args": { + "External id": 976243,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937106866.723, "dur": 45.578, + "args": { + "External id": 976244,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937106869.280, "dur": 42.366, + "args": { + "External id": 976245,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937106917.841, "dur": 17.879, + "args": { + "External id": 976246,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937106933.489, "dur": 0.717, + "args": { + "External id": 976247,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937106943.795, "dur": 1.552, + "args": { + "External id": 976248,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937106954.562, "dur": 10.687, + "args": { + "External id": 976249,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937106959.115, "dur": 5.800, + "args": { + "External id": 976250,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937107135.678, "dur": 211.383, + "args": { + "External id": 976251,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937107138.535, "dur": 4.145, + "args": { + "External id": 976252,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937107144.405, "dur": 202.081, + "args": { + "External id": 976253,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937107146.232, "dur": 0.560, + "args": { + "External id": 976254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937107148.026, "dur": 27.716, + "args": { + "External id": 976255,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937107177.452, "dur": 4.199, + "args": { + "External id": 976256,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937107180.210, "dur": 1.064, + "args": { + "External id": 976257,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937107182.790, "dur": 27.332, + "args": { + "External id": 976258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937107186.378, "dur": 1.494, + "args": { + "External id": 976259,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937107189.108, "dur": 20.662, + "args": { + "External id": 976260,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937107194.045, "dur": 2.805, + "args": { + "External id": 976261,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937107211.654, "dur": 24.349, + "args": { + "External id": 976262,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937107237.611, "dur": 14.801, + "args": { + "External id": 976263,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937107255.199, "dur": 15.514, + "args": { + "External id": 976264,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937107272.252, "dur": 15.226, + "args": { + "External id": 976265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937107289.478, "dur": 25.491, + "args": { + "External id": 976266,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937107291.479, "dur": 2.019, + "args": { + "External id": 976267,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937107297.798, "dur": 0.743, + "args": { + "External id": 976268,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937107316.732, "dur": 14.781, + "args": { + "External id": 976269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937107332.806, "dur": 12.388, + "args": { + "External id": 976270,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937107355.919, "dur": 2.597, + "args": { + "External id": 976271,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937107369.473, "dur": 4.469, + "args": { + "External id": 976272,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937107372.399, "dur": 0.468, + "args": { + "External id": 976273,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937107454.351, "dur": 67.031, + "args": { + "External id": 976274,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937107527.652, "dur": 8.108, + "args": { + "External id": 976275,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937107533.028, "dur": 1.390, + "args": { + "External id": 976276,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937107537.391, "dur": 28.243, + "args": { + "External id": 976277,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937107570.912, "dur": 5.475, + "args": { + "External id": 976278,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937107572.346, "dur": 3.087, + "args": { + "External id": 976279,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937107574.220, "dur": 0.956, + "args": { + "External id": 976280,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937107579.152, "dur": 48.920, + "args": { + "External id": 976281,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937107580.135, "dur": 47.053, + "args": { + "External id": 976282,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937107634.884, "dur": 17.155, + "args": { + "External id": 976283,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937107658.620, "dur": 4.215, + "args": { + "External id": 976284,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937107660.997, "dur": 0.683, + "args": { + "External id": 976285,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937107667.178, "dur": 56.762, + "args": { + "External id": 976286,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937107668.111, "dur": 7.252, + "args": { + "External id": 976287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937107668.829, "dur": 5.814, + "args": { + "External id": 976288,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937107672.341, "dur": 2.079, + "args": { + "External id": 976289,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937107676.110, "dur": 47.448, + "args": { + "External id": 976290,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937107676.642, "dur": 46.127, + "args": { + "External id": 976291,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937107728.755, "dur": 3.592, + "args": { + "External id": 976292,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937107730.625, "dur": 0.508, + "args": { + "External id": 976293,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937107738.221, "dur": 1.650, + "args": { + "External id": 976294,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937107748.666, "dur": 9.181, + "args": { + "External id": 976295,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937107753.633, "dur": 3.880, + "args": { + "External id": 976296,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937107850.799, "dur": 274.693, + "args": { + "External id": 976297,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937107853.007, "dur": 2.111, + "args": { + "External id": 976298,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937107856.260, "dur": 268.631, + "args": { + "External id": 976299,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937107857.297, "dur": 0.281, + "args": { + "External id": 976300,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937107860.658, "dur": 25.449, + "args": { + "External id": 976301,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937107887.749, "dur": 2.959, + "args": { + "External id": 976302,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937107889.578, "dur": 0.838, + "args": { + "External id": 976303,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937107894.107, "dur": 30.247, + "args": { + "External id": 976304,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937107900.322, "dur": 2.995, + "args": { + "External id": 976305,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937107904.779, "dur": 19.164, + "args": { + "External id": 976306,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937107907.546, "dur": 2.526, + "args": { + "External id": 976307,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937107925.581, "dur": 23.452, + "args": { + "External id": 976308,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937107950.306, "dur": 14.825, + "args": { + "External id": 976309,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937107967.938, "dur": 16.064, + "args": { + "External id": 976310,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937107985.181, "dur": 15.080, + "args": { + "External id": 976311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937108002.247, "dur": 46.809, + "args": { + "External id": 976312,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937108006.344, "dur": 19.676, + "args": { + "External id": 976313,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937108029.431, "dur": 0.831, + "args": { + "External id": 976314,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937108051.049, "dur": 55.156, + "args": { + "External id": 976315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937108108.421, "dur": 14.721, + "args": { + "External id": 976316,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937108135.470, "dur": 3.040, + "args": { + "External id": 976317,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937108149.991, "dur": 4.926, + "args": { + "External id": 976318,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937108153.097, "dur": 0.688, + "args": { + "External id": 976319,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937108237.107, "dur": 66.569, + "args": { + "External id": 976320,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937108309.813, "dur": 4.778, + "args": { + "External id": 976321,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937108312.484, "dur": 0.845, + "args": { + "External id": 976322,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937108316.233, "dur": 31.052, + "args": { + "External id": 976323,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937108351.895, "dur": 10.070, + "args": { + "External id": 976324,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937108353.972, "dur": 7.220, + "args": { + "External id": 976325,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937108358.628, "dur": 2.293, + "args": { + "External id": 976326,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937108364.963, "dur": 50.440, + "args": { + "External id": 976327,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937108366.202, "dur": 48.439, + "args": { + "External id": 976328,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937108420.147, "dur": 17.376, + "args": { + "External id": 976329,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937108444.272, "dur": 3.880, + "args": { + "External id": 976330,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937108446.329, "dur": 0.878, + "args": { + "External id": 976331,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937108452.388, "dur": 59.270, + "args": { + "External id": 976332,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937108455.822, "dur": 8.280, + "args": { + "External id": 976333,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937108456.512, "dur": 6.906, + "args": { + "External id": 976334,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937108462.547, "dur": 0.716, + "args": { + "External id": 976335,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937108464.863, "dur": 46.359, + "args": { + "External id": 976336,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937108465.515, "dur": 44.927, + "args": { + "External id": 976337,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937108516.396, "dur": 3.606, + "args": { + "External id": 976338,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937108518.377, "dur": 0.465, + "args": { + "External id": 976339,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937108528.562, "dur": 1.614, + "args": { + "External id": 976340,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937108539.061, "dur": 8.627, + "args": { + "External id": 976341,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937108541.279, "dur": 6.109, + "args": { + "External id": 976342,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937108641.673, "dur": 190.378, + "args": { + "External id": 976343,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937108643.513, "dur": 2.349, + "args": { + "External id": 976344,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937108647.212, "dur": 184.288, + "args": { + "External id": 976345,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937108650.529, "dur": 0.465, + "args": { + "External id": 976346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937108652.050, "dur": 26.366, + "args": { + "External id": 976347,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937108680.001, "dur": 3.089, + "args": { + "External id": 976348,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937108682.047, "dur": 0.791, + "args": { + "External id": 976349,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937108684.187, "dur": 24.268, + "args": { + "External id": 976350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937108685.129, "dur": 1.553, + "args": { + "External id": 976351,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937108687.774, "dur": 20.138, + "args": { + "External id": 976352,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937108692.492, "dur": 2.344, + "args": { + "External id": 976353,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937108709.919, "dur": 23.141, + "args": { + "External id": 976354,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937108734.363, "dur": 14.080, + "args": { + "External id": 976355,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937108753.100, "dur": 13.868, + "args": { + "External id": 976356,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937108768.293, "dur": 12.515, + "args": { + "External id": 976357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937108782.572, "dur": 20.745, + "args": { + "External id": 976358,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937108784.383, "dur": 2.010, + "args": { + "External id": 976359,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937108788.507, "dur": 0.554, + "args": { + "External id": 976360,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937108804.903, "dur": 12.232, + "args": { + "External id": 976361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937108818.235, "dur": 11.852, + "args": { + "External id": 976362,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937108841.516, "dur": 2.060, + "args": { + "External id": 976363,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937108852.363, "dur": 3.929, + "args": { + "External id": 976364,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937108854.897, "dur": 0.472, + "args": { + "External id": 976365,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937108927.257, "dur": 54.624, + "args": { + "External id": 976366,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937108987.352, "dur": 5.351, + "args": { + "External id": 976367,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937108990.352, "dur": 1.180, + "args": { + "External id": 976368,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937108994.045, "dur": 47.999, + "args": { + "External id": 976369,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937109049.513, "dur": 45.930, + "args": { + "External id": 976370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937109051.175, "dur": 43.097, + "args": { + "External id": 976371,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937109092.368, "dur": 1.367, + "args": { + "External id": 976372,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937109099.598, "dur": 58.410, + "args": { + "External id": 976373,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937109101.058, "dur": 55.876, + "args": { + "External id": 976374,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937109163.045, "dur": 20.508, + "args": { + "External id": 976375,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937109189.644, "dur": 31.754, + "args": { + "External id": 976376,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937109192.296, "dur": 28.602, + "args": { + "External id": 976377,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937109201.000, "dur": 0.583, + "args": { + "External id": 976378,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345937109228.245, "dur": 33.609, + "args": { + "External id": 976379,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345937109230.608, "dur": 31.003, + "args": { + "External id": 976380,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 15424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937109237.947, "dur": 4.495, + "args": { + "External id": 976381,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937109243.812, "dur": 17.212, + "args": { + "External id": 976382,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2338706, + "ts": 6345937109275.403, "dur": 5.889, + "args": { + "External id": 976383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2338706, + "ts": 6345937109277.467, "dur": 3.472, + "args": { + "External id": 976384,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2338706, + "ts": 6345937109333.505, "dur": 1.846, + "args": { + "External id": 976385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2338706, + "ts": 6345937109334.498, "dur": 0.753, + "args": { + "External id": 976386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937109393.931, "dur": 26.887, + "args": { + "External id": 976387,"Sequence number": 10552450, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937109423.348, "dur": 16.119, + "args": { + "External id": 976388,"Sequence number": 10552451, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15432 + } + }, + { + "ph": "s", "id": 16, "pid": 2338706, "tid": 2338706, "ts": 6345937109423.348, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937109446.986, "dur": 7.555, + "args": { + "External id": 976389,"Sequence number": 10552452, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 15433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937109451.423, "dur": 1.316, + "args": { + "External id": 976390,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2338706, + "ts": 6345937109457.495, "dur": 9.350, + "args": { + "External id": 976391,"Sequence number": 10552452, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 15435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937109461.915, "dur": 3.232, + "args": { + "External id": 976392,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937109468.239, "dur": 4.653, + "args": { + "External id": 976393,"Sequence number": 10552452, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 15437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937109471.774, "dur": 0.430, + "args": { + "External id": 976394,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 15438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937109477.672, "dur": 5.997, + "args": { + "External id": 976395,"Sequence number": 10552452, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15439 + } + }, + { + "ph": "s", "id": 15, "pid": 2338706, "tid": 2338706, "ts": 6345937109477.672, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937109481.177, "dur": 0.892, + "args": { + "External id": 976396,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937109484.939, "dur": 5.940, + "args": { + "External id": 976397,"Sequence number": 10552453, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15441 + } + }, + { + "ph": "s", "id": 14, "pid": 2338706, "tid": 2338706, "ts": 6345937109484.939, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937109489.507, "dur": 0.369, + "args": { + "External id": 976398,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2338706, + "ts": 6345937109494.131, "dur": 4.827, + "args": { + "External id": 976399,"Sequence number": 10552454, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 15443 + } + }, + { + "ph": "s", "id": 13, "pid": 2338706, "tid": 2338706, "ts": 6345937109494.131, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937109497.530, "dur": 0.470, + "args": { + "External id": 976400,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937109500.291, "dur": 4.023, + "args": { + "External id": 976401,"Sequence number": 10552455, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 15445 + } + }, + { + "ph": "s", "id": 12, "pid": 2338706, "tid": 2338706, "ts": 6345937109500.291, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937109502.615, "dur": 0.765, + "args": { + "External id": 976402,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 15446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2338706, + "ts": 6345937109508.516, "dur": 39.236, + "args": { + "External id": 976403,"Sequence number": 10552456, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2338706, + "ts": 6345937109510.071, "dur": 37.383, + "args": { + "External id": 976404,"Sequence number": 10552456, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937109512.915, "dur": 7.829, + "args": { + "External id": 976405,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 15449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937109515.447, "dur": 4.568, + "args": { + "External id": 976406,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937109524.043, "dur": 22.800, + "args": { + "External id": 976407,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 15451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937109578.836, "dur": 4.370, + "args": { + "External id": 976408,"Sequence number": 10552456, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 15452 + } + }, + { + "ph": "s", "id": 11, "pid": 2338706, "tid": 2338706, "ts": 6345937109578.836, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937109585.749, "dur": 3.914, + "args": { + "External id": 976409,"Sequence number": 10552457, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345937109628.063, "dur": 45335.356, + "args": { + "External id": 976410,"Sequence number": 10552457, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 15454 + } + }, + { + "ph": "s", "id": 10, "pid": 2338706, "tid": 2338706, "ts": 6345937109628.063, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2338706, + "ts": 6345937109647.325, "dur": 29.877, + "args": { + "External id": 976411,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2338706, + "ts": 6345937109647.930, "dur": 28.894, + "args": { + "External id": 976412,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937109649.275, "dur": 7.407, + "args": { + "External id": 976413,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937109653.059, "dur": 3.126, + "args": { + "External id": 976414,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937109657.466, "dur": 18.723, + "args": { + "External id": 976415,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 15459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937109696.375, "dur": 28.372, + "args": { + "External id": 976416,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937109697.764, "dur": 6.458, + "args": { + "External id": 976417,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937109699.628, "dur": 4.218, + "args": { + "External id": 976418,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345937109705.747, "dur": 18.730, + "args": { + "External id": 976419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937109707.833, "dur": 16.146, + "args": { + "External id": 976420,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937109728.746, "dur": 22.886, + "args": { + "External id": 976421,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937109729.323, "dur": 3.669, + "args": { + "External id": 976422,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937109730.534, "dur": 2.171, + "args": { + "External id": 976423,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345937109735.943, "dur": 15.453, + "args": { + "External id": 976424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937109736.499, "dur": 14.467, + "args": { + "External id": 976425,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 15469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2338706, + "ts": 6345937109758.686, "dur": 21.855, + "args": { + "External id": 976426,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 15470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937109760.340, "dur": 2.817, + "args": { + "External id": 976427,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345937109764.140, "dur": 16.034, + "args": { + "External id": 976428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 15472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937109766.473, "dur": 13.267, + "args": { + "External id": 976429,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338706, "tid": 2338706, + "ts": 6345937109786.095, "dur": 23.812, + "args": { + "External id": 976430,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937109813.154, "dur": 57.509, + "args": { + "External id": 976431,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937109817.998, "dur": 52.146, + "args": { + "External id": 976432,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937109824.143, "dur": 1.124, + "args": { + "External id": 976433,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345937109826.729, "dur": 27.417, + "args": { + "External id": 976434,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345937109828.667, "dur": 25.175, + "args": { + "External id": 976435,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 15479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937109832.718, "dur": 4.750, + "args": { + "External id": 976436,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937109838.523, "dur": 14.900, + "args": { + "External id": 976437,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 15481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338706, "tid": 2338706, + "ts": 6345937109874.918, "dur": 38567.451, + "args": { + "External id": 976438,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338706, "tid": 2338706, + "ts": 6345937109876.533, "dur": 38563.746, + "args": { + "External id": 976439,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937148459.938, "dur": 11.022, + "args": { + "External id": 976440,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937148466.372, "dur": 1.354, + "args": { + "External id": 976441,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937148481.003, "dur": 125.149, + "args": { + "External id": 976442,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937148482.639, "dur": 9.189, + "args": { + "External id": 976443,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937148486.289, "dur": 4.665, + "args": { + "External id": 976444,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937148488.489, "dur": 0.993, + "args": { + "External id": 976445,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937148493.635, "dur": 111.904, + "args": { + "External id": 976446,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937148495.874, "dur": 108.726, + "args": { + "External id": 976447,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937148611.324, "dur": 5.927, + "args": { + "External id": 976448,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937148613.962, "dur": 1.896, + "args": { + "External id": 976449,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937148628.424, "dur": 3.654, + "args": { + "External id": 976450,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937148644.275, "dur": 8.729, + "args": { + "External id": 976451,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937148647.954, "dur": 4.696, + "args": { + "External id": 976452,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937148821.116, "dur": 301.939, + "args": { + "External id": 976453,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937148825.936, "dur": 2.574, + "args": { + "External id": 976454,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937148833.846, "dur": 288.598, + "args": { + "External id": 976455,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937148836.122, "dur": 1.509, + "args": { + "External id": 976456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937148840.811, "dur": 33.486, + "args": { + "External id": 976457,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937148876.283, "dur": 3.959, + "args": { + "External id": 976458,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937148878.906, "dur": 1.024, + "args": { + "External id": 976459,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937148881.552, "dur": 29.427, + "args": { + "External id": 976460,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937148882.723, "dur": 2.869, + "args": { + "External id": 976461,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937148887.228, "dur": 23.357, + "args": { + "External id": 976462,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937148891.781, "dur": 3.424, + "args": { + "External id": 976463,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937148914.923, "dur": 25.517, + "args": { + "External id": 976464,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937148942.529, "dur": 17.903, + "args": { + "External id": 976465,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937148963.871, "dur": 17.788, + "args": { + "External id": 976466,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937148983.646, "dur": 15.619, + "args": { + "External id": 976467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937149001.583, "dur": 46.315, + "args": { + "External id": 976468,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937149004.346, "dur": 1.654, + "args": { + "External id": 976469,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937149025.551, "dur": 0.800, + "args": { + "External id": 976470,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937149085.291, "dur": 19.559, + "args": { + "External id": 976471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937149107.252, "dur": 13.482, + "args": { + "External id": 976472,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937149133.130, "dur": 3.097, + "args": { + "External id": 976473,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937149144.998, "dur": 6.302, + "args": { + "External id": 976474,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937149149.046, "dur": 0.737, + "args": { + "External id": 976475,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937149238.193, "dur": 81.980, + "args": { + "External id": 976476,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937149327.078, "dur": 10.222, + "args": { + "External id": 976477,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937149330.254, "dur": 4.245, + "args": { + "External id": 976478,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937149341.391, "dur": 35.628, + "args": { + "External id": 976479,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937149383.442, "dur": 9.524, + "args": { + "External id": 976480,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937149385.615, "dur": 6.481, + "args": { + "External id": 976481,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937149389.089, "dur": 2.620, + "args": { + "External id": 976482,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937149396.637, "dur": 53.616, + "args": { + "External id": 976483,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937149397.912, "dur": 51.355, + "args": { + "External id": 976484,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937149454.770, "dur": 19.884, + "args": { + "External id": 976485,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937149484.212, "dur": 5.702, + "args": { + "External id": 976486,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937149488.034, "dur": 0.829, + "args": { + "External id": 976487,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937149495.118, "dur": 54.587, + "args": { + "External id": 976488,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937149496.277, "dur": 3.773, + "args": { + "External id": 976489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937149497.171, "dur": 2.190, + "args": { + "External id": 976490,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937149498.454, "dur": 0.749, + "args": { + "External id": 976491,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937149503.154, "dur": 46.108, + "args": { + "External id": 976492,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937149503.977, "dur": 44.502, + "args": { + "External id": 976493,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937149553.954, "dur": 3.899, + "args": { + "External id": 976494,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937149556.001, "dur": 0.613, + "args": { + "External id": 976495,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937149564.768, "dur": 1.536, + "args": { + "External id": 976496,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937149575.360, "dur": 9.955, + "args": { + "External id": 976497,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937149580.181, "dur": 4.807, + "args": { + "External id": 976498,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937149694.264, "dur": 214.471, + "args": { + "External id": 976499,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937149698.171, "dur": 5.306, + "args": { + "External id": 976500,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937149707.164, "dur": 200.845, + "args": { + "External id": 976501,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937149709.653, "dur": 0.622, + "args": { + "External id": 976502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937149712.049, "dur": 24.599, + "args": { + "External id": 976503,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937149738.215, "dur": 4.577, + "args": { + "External id": 976504,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937149740.255, "dur": 2.173, + "args": { + "External id": 976505,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937149746.249, "dur": 23.647, + "args": { + "External id": 976506,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937149747.338, "dur": 1.498, + "args": { + "External id": 976507,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937149750.252, "dur": 19.237, + "args": { + "External id": 976508,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937149753.123, "dur": 2.541, + "args": { + "External id": 976509,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937149771.742, "dur": 24.283, + "args": { + "External id": 976510,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937149797.795, "dur": 15.564, + "args": { + "External id": 976511,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937149816.214, "dur": 16.235, + "args": { + "External id": 976512,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937149834.136, "dur": 15.169, + "args": { + "External id": 976513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937149851.292, "dur": 27.554, + "args": { + "External id": 976514,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937149853.490, "dur": 2.640, + "args": { + "External id": 976515,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937149860.587, "dur": 2.751, + "args": { + "External id": 976516,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937149880.575, "dur": 13.557, + "args": { + "External id": 976517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937149895.435, "dur": 11.551, + "args": { + "External id": 976518,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937149916.109, "dur": 1.775, + "args": { + "External id": 976519,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937149928.255, "dur": 4.354, + "args": { + "External id": 976520,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937149930.703, "dur": 0.689, + "args": { + "External id": 976521,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937150005.734, "dur": 125.456, + "args": { + "External id": 976522,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937150140.195, "dur": 9.304, + "args": { + "External id": 976523,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937150146.240, "dur": 1.159, + "args": { + "External id": 976524,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937150151.066, "dur": 33.077, + "args": { + "External id": 976525,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937150190.739, "dur": 9.035, + "args": { + "External id": 976526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937150192.781, "dur": 6.248, + "args": { + "External id": 976527,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937150194.862, "dur": 3.877, + "args": { + "External id": 976528,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937150203.555, "dur": 54.528, + "args": { + "External id": 976529,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937150207.430, "dur": 50.024, + "args": { + "External id": 976530,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937150263.223, "dur": 19.687, + "args": { + "External id": 976531,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937150289.170, "dur": 4.066, + "args": { + "External id": 976532,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937150291.609, "dur": 0.722, + "args": { + "External id": 976533,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937150297.915, "dur": 56.662, + "args": { + "External id": 976534,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937150298.904, "dur": 6.466, + "args": { + "External id": 976535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937150299.861, "dur": 4.795, + "args": { + "External id": 976536,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937150303.650, "dur": 0.819, + "args": { + "External id": 976537,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937150306.395, "dur": 47.785, + "args": { + "External id": 976538,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937150307.340, "dur": 46.201, + "args": { + "External id": 976539,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937150359.716, "dur": 3.915, + "args": { + "External id": 976540,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937150361.781, "dur": 0.630, + "args": { + "External id": 976541,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937150383.124, "dur": 2.955, + "args": { + "External id": 976542,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937150396.030, "dur": 11.100, + "args": { + "External id": 976543,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937150400.523, "dur": 6.279, + "args": { + "External id": 976544,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937150512.425, "dur": 257.860, + "args": { + "External id": 976545,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937150515.237, "dur": 2.353, + "args": { + "External id": 976546,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937150518.944, "dur": 250.792, + "args": { + "External id": 976547,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937150520.428, "dur": 0.476, + "args": { + "External id": 976548,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937150522.327, "dur": 30.508, + "args": { + "External id": 976549,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937150554.809, "dur": 3.288, + "args": { + "External id": 976550,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937150557.159, "dur": 0.621, + "args": { + "External id": 976551,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937150561.699, "dur": 29.879, + "args": { + "External id": 976552,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937150563.017, "dur": 1.687, + "args": { + "External id": 976553,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937150565.983, "dur": 25.102, + "args": { + "External id": 976554,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937150571.378, "dur": 3.222, + "args": { + "External id": 976555,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937150593.519, "dur": 36.412, + "args": { + "External id": 976556,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937150631.644, "dur": 41.929, + "args": { + "External id": 976557,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937150678.475, "dur": 15.839, + "args": { + "External id": 976558,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937150696.004, "dur": 19.157, + "args": { + "External id": 976559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937150717.211, "dur": 24.906, + "args": { + "External id": 976560,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937150722.139, "dur": 1.794, + "args": { + "External id": 976561,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937150725.659, "dur": 0.692, + "args": { + "External id": 976562,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937150743.694, "dur": 12.499, + "args": { + "External id": 976563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937150757.605, "dur": 10.946, + "args": { + "External id": 976564,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937150777.180, "dur": 1.784, + "args": { + "External id": 976565,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937150788.071, "dur": 4.066, + "args": { + "External id": 976566,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937150790.559, "dur": 0.534, + "args": { + "External id": 976567,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937150867.285, "dur": 56.647, + "args": { + "External id": 976568,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937150929.445, "dur": 5.134, + "args": { + "External id": 976569,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937150932.565, "dur": 0.751, + "args": { + "External id": 976570,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937150936.210, "dur": 25.424, + "args": { + "External id": 976571,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937150966.893, "dur": 8.419, + "args": { + "External id": 976572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937150968.607, "dur": 6.039, + "args": { + "External id": 976573,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937150973.008, "dur": 1.416, + "args": { + "External id": 976574,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937150978.361, "dur": 108.810, + "args": { + "External id": 976575,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937150979.765, "dur": 70.548, + "args": { + "External id": 976576,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937151096.618, "dur": 21.945, + "args": { + "External id": 976577,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937151126.628, "dur": 5.590, + "args": { + "External id": 976578,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937151129.896, "dur": 0.857, + "args": { + "External id": 976579,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937151137.181, "dur": 58.944, + "args": { + "External id": 976580,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937151140.926, "dur": 4.385, + "args": { + "External id": 976581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937151141.892, "dur": 2.705, + "args": { + "External id": 976582,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937151143.519, "dur": 0.877, + "args": { + "External id": 976583,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937151146.045, "dur": 49.701, + "args": { + "External id": 976584,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937151146.950, "dur": 48.070, + "args": { + "External id": 976585,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937151200.592, "dur": 11.027, + "args": { + "External id": 976586,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937151208.410, "dur": 1.809, + "args": { + "External id": 976587,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937151222.361, "dur": 2.011, + "args": { + "External id": 976588,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937151233.992, "dur": 7.248, + "args": { + "External id": 976589,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937151236.373, "dur": 4.560, + "args": { + "External id": 976590,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937151345.887, "dur": 220.692, + "args": { + "External id": 976591,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937151348.297, "dur": 2.355, + "args": { + "External id": 976592,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937151354.956, "dur": 211.106, + "args": { + "External id": 976593,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937151356.416, "dur": 0.425, + "args": { + "External id": 976594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937151357.993, "dur": 28.662, + "args": { + "External id": 976595,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937151388.443, "dur": 5.961, + "args": { + "External id": 976596,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937151390.708, "dur": 3.370, + "args": { + "External id": 976597,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937151395.517, "dur": 27.491, + "args": { + "External id": 976598,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937151396.833, "dur": 1.658, + "args": { + "External id": 976599,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937151400.100, "dur": 22.546, + "args": { + "External id": 976600,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937151405.131, "dur": 2.988, + "args": { + "External id": 976601,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937151424.673, "dur": 24.667, + "args": { + "External id": 976602,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937151451.160, "dur": 15.774, + "args": { + "External id": 976603,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937151472.605, "dur": 17.764, + "args": { + "External id": 976604,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937151491.922, "dur": 14.352, + "args": { + "External id": 976605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937151508.163, "dur": 23.937, + "args": { + "External id": 976606,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937151510.592, "dur": 1.942, + "args": { + "External id": 976607,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937151514.374, "dur": 0.790, + "args": { + "External id": 976608,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937151533.368, "dur": 14.298, + "args": { + "External id": 976609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937151551.406, "dur": 13.356, + "args": { + "External id": 976610,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937151574.430, "dur": 1.545, + "args": { + "External id": 976611,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937151586.227, "dur": 3.622, + "args": { + "External id": 976612,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937151588.400, "dur": 0.435, + "args": { + "External id": 976613,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937151663.981, "dur": 58.878, + "args": { + "External id": 976614,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937151728.389, "dur": 5.499, + "args": { + "External id": 976615,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937151731.574, "dur": 1.114, + "args": { + "External id": 976616,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937151735.370, "dur": 29.678, + "args": { + "External id": 976617,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937151769.749, "dur": 7.270, + "args": { + "External id": 976618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937151773.490, "dur": 2.791, + "args": { + "External id": 976619,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937151775.067, "dur": 0.942, + "args": { + "External id": 976620,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937151780.128, "dur": 45.391, + "args": { + "External id": 976621,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937151781.222, "dur": 43.615, + "args": { + "External id": 976622,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937151830.053, "dur": 16.328, + "args": { + "External id": 976623,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937151852.161, "dur": 8.831, + "args": { + "External id": 976624,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937151856.760, "dur": 3.298, + "args": { + "External id": 976625,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937151865.161, "dur": 48.697, + "args": { + "External id": 976626,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937151866.204, "dur": 4.896, + "args": { + "External id": 976627,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937151867.021, "dur": 3.342, + "args": { + "External id": 976628,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937151869.779, "dur": 0.443, + "args": { + "External id": 976629,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937151871.848, "dur": 41.594, + "args": { + "External id": 976630,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937151872.515, "dur": 40.109, + "args": { + "External id": 976631,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937151918.564, "dur": 4.204, + "args": { + "External id": 976632,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937151920.713, "dur": 0.859, + "args": { + "External id": 976633,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937151930.688, "dur": 1.636, + "args": { + "External id": 976634,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937151940.082, "dur": 6.160, + "args": { + "External id": 976635,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937151941.902, "dur": 3.995, + "args": { + "External id": 976636,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937152097.080, "dur": 226.529, + "args": { + "External id": 976637,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937152101.544, "dur": 4.378, + "args": { + "External id": 976638,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937152110.164, "dur": 213.048, + "args": { + "External id": 976639,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937152113.409, "dur": 0.431, + "args": { + "External id": 976640,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937152115.081, "dur": 26.722, + "args": { + "External id": 976641,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937152143.417, "dur": 4.123, + "args": { + "External id": 976642,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937152145.610, "dur": 1.414, + "args": { + "External id": 976643,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937152148.580, "dur": 26.776, + "args": { + "External id": 976644,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937152151.049, "dur": 4.096, + "args": { + "External id": 976645,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937152156.375, "dur": 18.595, + "args": { + "External id": 976646,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937152159.660, "dur": 2.760, + "args": { + "External id": 976647,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937152179.625, "dur": 24.513, + "args": { + "External id": 976648,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937152205.800, "dur": 22.809, + "args": { + "External id": 976649,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937152231.834, "dur": 14.470, + "args": { + "External id": 976650,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937152247.806, "dur": 16.117, + "args": { + "External id": 976651,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937152265.914, "dur": 22.838, + "args": { + "External id": 976652,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937152268.423, "dur": 1.560, + "args": { + "External id": 976653,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937152271.841, "dur": 0.712, + "args": { + "External id": 976654,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937152292.724, "dur": 15.204, + "args": { + "External id": 976655,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937152309.289, "dur": 12.781, + "args": { + "External id": 976656,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937152331.612, "dur": 2.245, + "args": { + "External id": 976657,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937152344.485, "dur": 4.606, + "args": { + "External id": 976658,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937152347.249, "dur": 0.688, + "args": { + "External id": 976659,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937152427.934, "dur": 63.977, + "args": { + "External id": 976660,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937152497.468, "dur": 6.705, + "args": { + "External id": 976661,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937152500.459, "dur": 2.198, + "args": { + "External id": 976662,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937152507.950, "dur": 30.169, + "args": { + "External id": 976663,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937152543.643, "dur": 6.184, + "args": { + "External id": 976664,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937152545.993, "dur": 3.135, + "args": { + "External id": 976665,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937152547.886, "dur": 0.994, + "args": { + "External id": 976666,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937152552.690, "dur": 46.641, + "args": { + "External id": 976667,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937152554.117, "dur": 44.493, + "args": { + "External id": 976668,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937152603.744, "dur": 17.420, + "args": { + "External id": 976669,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937152630.172, "dur": 4.028, + "args": { + "External id": 976670,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937152632.631, "dur": 0.500, + "args": { + "External id": 976671,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937152638.584, "dur": 53.756, + "args": { + "External id": 976672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937152639.944, "dur": 5.957, + "args": { + "External id": 976673,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937152640.904, "dur": 4.350, + "args": { + "External id": 976674,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937152642.301, "dur": 2.830, + "args": { + "External id": 976675,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937152649.160, "dur": 42.790, + "args": { + "External id": 976676,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937152649.964, "dur": 41.154, + "args": { + "External id": 976677,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937152696.864, "dur": 3.951, + "args": { + "External id": 976678,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937152699.013, "dur": 0.580, + "args": { + "External id": 976679,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937152706.726, "dur": 1.462, + "args": { + "External id": 976680,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937152716.556, "dur": 8.845, + "args": { + "External id": 976681,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937152720.855, "dur": 4.221, + "args": { + "External id": 976682,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937152813.306, "dur": 185.923, + "args": { + "External id": 976683,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937152815.369, "dur": 3.395, + "args": { + "External id": 976684,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937152820.395, "dur": 178.283, + "args": { + "External id": 976685,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937152821.424, "dur": 0.376, + "args": { + "External id": 976686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937152822.811, "dur": 20.434, + "args": { + "External id": 976687,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937152844.673, "dur": 4.727, + "args": { + "External id": 976688,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937152848.415, "dur": 0.754, + "args": { + "External id": 976689,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937152852.618, "dur": 22.583, + "args": { + "External id": 976690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937152854.016, "dur": 1.924, + "args": { + "External id": 976691,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937152857.476, "dur": 17.393, + "args": { + "External id": 976692,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937152860.180, "dur": 2.223, + "args": { + "External id": 976693,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937152876.960, "dur": 21.569, + "args": { + "External id": 976694,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937152900.106, "dur": 13.574, + "args": { + "External id": 976695,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937152916.231, "dur": 14.361, + "args": { + "External id": 976696,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937152932.020, "dur": 11.998, + "args": { + "External id": 976697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937152945.805, "dur": 25.098, + "args": { + "External id": 976698,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937152949.511, "dur": 1.777, + "args": { + "External id": 976699,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937152956.029, "dur": 0.604, + "args": { + "External id": 976700,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937152972.381, "dur": 12.242, + "args": { + "External id": 976701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937152986.076, "dur": 11.544, + "args": { + "External id": 976702,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937153005.896, "dur": 19.645, + "args": { + "External id": 976703,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937153039.127, "dur": 5.428, + "args": { + "External id": 976704,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937153042.259, "dur": 1.134, + "args": { + "External id": 976705,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937153162.534, "dur": 67.784, + "args": { + "External id": 976706,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937153236.512, "dur": 8.448, + "args": { + "External id": 976707,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937153242.016, "dur": 1.300, + "args": { + "External id": 976708,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937153246.708, "dur": 30.612, + "args": { + "External id": 976709,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937153283.162, "dur": 7.013, + "args": { + "External id": 976710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937153285.131, "dur": 4.235, + "args": { + "External id": 976711,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937153287.244, "dur": 1.851, + "args": { + "External id": 976712,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937153293.407, "dur": 49.201, + "args": { + "External id": 976713,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937153297.016, "dur": 44.694, + "args": { + "External id": 976714,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937153347.690, "dur": 16.520, + "args": { + "External id": 976715,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937153370.265, "dur": 4.022, + "args": { + "External id": 976716,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937153372.541, "dur": 0.643, + "args": { + "External id": 976717,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937153378.720, "dur": 54.488, + "args": { + "External id": 976718,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937153379.815, "dur": 6.537, + "args": { + "External id": 976719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937153380.981, "dur": 4.704, + "args": { + "External id": 976720,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937153384.619, "dur": 0.884, + "args": { + "External id": 976721,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937153387.069, "dur": 45.771, + "args": { + "External id": 976722,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937153387.946, "dur": 44.010, + "args": { + "External id": 976723,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937153437.905, "dur": 3.815, + "args": { + "External id": 976724,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937153440.057, "dur": 0.461, + "args": { + "External id": 976725,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937153448.259, "dur": 1.682, + "args": { + "External id": 976726,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937153458.933, "dur": 9.137, + "args": { + "External id": 976727,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937153463.164, "dur": 4.561, + "args": { + "External id": 976728,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937153558.945, "dur": 200.369, + "args": { + "External id": 976729,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937153563.020, "dur": 2.558, + "args": { + "External id": 976730,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937153567.035, "dur": 191.735, + "args": { + "External id": 976731,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937153568.463, "dur": 0.554, + "args": { + "External id": 976732,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937153570.270, "dur": 22.150, + "args": { + "External id": 976733,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937153594.081, "dur": 4.871, + "args": { + "External id": 976734,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937153597.480, "dur": 1.182, + "args": { + "External id": 976735,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937153602.351, "dur": 23.365, + "args": { + "External id": 976736,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937153603.584, "dur": 1.981, + "args": { + "External id": 976737,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937153606.961, "dur": 18.319, + "args": { + "External id": 976738,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937153609.968, "dur": 2.643, + "args": { + "External id": 976739,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937153627.283, "dur": 25.051, + "args": { + "External id": 976740,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937153653.962, "dur": 16.933, + "args": { + "External id": 976741,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937153674.020, "dur": 15.185, + "args": { + "External id": 976742,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937153690.638, "dur": 13.245, + "args": { + "External id": 976743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937153706.295, "dur": 22.548, + "args": { + "External id": 976744,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937153710.899, "dur": 1.537, + "args": { + "External id": 976745,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937153714.670, "dur": 0.849, + "args": { + "External id": 976746,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937153730.498, "dur": 13.031, + "args": { + "External id": 976747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937153744.737, "dur": 12.584, + "args": { + "External id": 976748,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937153766.449, "dur": 1.718, + "args": { + "External id": 976749,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937153777.083, "dur": 3.934, + "args": { + "External id": 976750,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937153779.338, "dur": 0.548, + "args": { + "External id": 976751,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937153856.410, "dur": 53.532, + "args": { + "External id": 976752,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937153915.348, "dur": 4.736, + "args": { + "External id": 976753,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937153918.144, "dur": 0.857, + "args": { + "External id": 976754,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937153921.619, "dur": 23.936, + "args": { + "External id": 976755,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937153950.359, "dur": 7.895, + "args": { + "External id": 976756,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937153951.854, "dur": 5.687, + "args": { + "External id": 976757,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937153955.743, "dur": 1.576, + "args": { + "External id": 976758,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937153960.956, "dur": 65.199, + "args": { + "External id": 976759,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937153962.085, "dur": 44.599, + "args": { + "External id": 976760,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937154032.629, "dur": 59.101, + "args": { + "External id": 976761,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937154102.458, "dur": 5.900, + "args": { + "External id": 976762,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937154105.784, "dur": 1.118, + "args": { + "External id": 976763,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937154113.588, "dur": 63.392, + "args": { + "External id": 976764,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937154117.042, "dur": 4.064, + "args": { + "External id": 976765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937154118.153, "dur": 2.296, + "args": { + "External id": 976766,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937154119.597, "dur": 0.680, + "args": { + "External id": 976767,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937154121.873, "dur": 54.702, + "args": { + "External id": 976768,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937154122.968, "dur": 52.767, + "args": { + "External id": 976769,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937154181.588, "dur": 4.698, + "args": { + "External id": 976770,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937154184.179, "dur": 0.764, + "args": { + "External id": 976771,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937154196.493, "dur": 1.885, + "args": { + "External id": 976772,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937154207.331, "dur": 8.710, + "args": { + "External id": 976773,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937154209.695, "dur": 6.023, + "args": { + "External id": 976774,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937154321.342, "dur": 241.692, + "args": { + "External id": 976775,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937154324.206, "dur": 2.183, + "args": { + "External id": 976776,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937154330.712, "dur": 231.631, + "args": { + "External id": 976777,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937154332.525, "dur": 0.677, + "args": { + "External id": 976778,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937154334.428, "dur": 29.290, + "args": { + "External id": 976779,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937154365.497, "dur": 3.120, + "args": { + "External id": 976780,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937154367.517, "dur": 0.765, + "args": { + "External id": 976781,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937154369.552, "dur": 46.457, + "args": { + "External id": 976782,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937154370.409, "dur": 1.488, + "args": { + "External id": 976783,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937154373.042, "dur": 42.510, + "args": { + "External id": 976784,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937154397.162, "dur": 3.047, + "args": { + "External id": 976785,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937154418.311, "dur": 27.358, + "args": { + "External id": 976786,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937154447.596, "dur": 15.825, + "args": { + "External id": 976787,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937154468.858, "dur": 16.562, + "args": { + "External id": 976788,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937154486.856, "dur": 14.992, + "args": { + "External id": 976789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937154503.982, "dur": 23.531, + "args": { + "External id": 976790,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937154506.181, "dur": 1.676, + "args": { + "External id": 976791,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937154510.203, "dur": 0.689, + "args": { + "External id": 976792,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937154529.047, "dur": 14.099, + "args": { + "External id": 976793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937154547.261, "dur": 13.798, + "args": { + "External id": 976794,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937154569.883, "dur": 1.814, + "args": { + "External id": 976795,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937154581.317, "dur": 3.789, + "args": { + "External id": 976796,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937154583.762, "dur": 0.454, + "args": { + "External id": 976797,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937154657.034, "dur": 57.774, + "args": { + "External id": 976798,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937154720.597, "dur": 5.562, + "args": { + "External id": 976799,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937154723.656, "dur": 1.324, + "args": { + "External id": 976800,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937154727.757, "dur": 31.156, + "args": { + "External id": 976801,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937154764.070, "dur": 8.130, + "args": { + "External id": 976802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937154768.501, "dur": 3.000, + "args": { + "External id": 976803,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937154770.330, "dur": 0.940, + "args": { + "External id": 976804,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937154775.065, "dur": 46.105, + "args": { + "External id": 976805,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937154776.247, "dur": 44.097, + "args": { + "External id": 976806,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937154825.085, "dur": 17.100, + "args": { + "External id": 976807,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937154847.064, "dur": 28.479, + "args": { + "External id": 976808,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937154849.718, "dur": 25.339, + "args": { + "External id": 976809,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937154856.515, "dur": 0.794, + "args": { + "External id": 976810,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345937154883.617, "dur": 28.043, + "args": { + "External id": 976811,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345937154885.722, "dur": 25.696, + "args": { + "External id": 976812,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 15856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937154890.465, "dur": 3.876, + "args": { + "External id": 976813,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937154895.397, "dur": 15.503, + "args": { + "External id": 976814,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2338706, + "ts": 6345937154923.211, "dur": 6.017, + "args": { + "External id": 976815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2338706, + "ts": 6345937154925.504, "dur": 3.133, + "args": { + "External id": 976816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2338706, + "ts": 6345937154930.517, "dur": 4.099, + "args": { + "External id": 976817,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2338706, + "ts": 6345937154933.906, "dur": 0.575, + "args": { + "External id": 976818,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937154983.597, "dur": 45.719, + "args": { + "External id": 976819,"Sequence number": 10552458, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937155033.604, "dur": 19.776, + "args": { + "External id": 976820,"Sequence number": 10552459, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15864 + } + }, + { + "ph": "s", "id": 9, "pid": 2338706, "tid": 2338706, "ts": 6345937155033.604, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937155098.337, "dur": 9.494, + "args": { + "External id": 976821,"Sequence number": 10552460, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 15865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937155103.731, "dur": 1.780, + "args": { + "External id": 976822,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2338706, + "ts": 6345937155110.869, "dur": 11.616, + "args": { + "External id": 976823,"Sequence number": 10552460, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "3"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 15867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937155117.863, "dur": 3.048, + "args": { + "External id": 976824,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937155123.931, "dur": 2.740, + "args": { + "External id": 976825,"Sequence number": 10552460, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 15869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937155125.411, "dur": 0.595, + "args": { + "External id": 976826,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 15870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937155133.431, "dur": 6.185, + "args": { + "External id": 976827,"Sequence number": 10552460, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15871 + } + }, + { + "ph": "s", "id": 8, "pid": 2338706, "tid": 2338706, "ts": 6345937155133.431, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937155136.921, "dur": 0.790, + "args": { + "External id": 976828,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937155142.865, "dur": 4.594, + "args": { + "External id": 976829,"Sequence number": 10552461, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15873 + } + }, + { + "ph": "s", "id": 7, "pid": 2338706, "tid": 2338706, "ts": 6345937155142.865, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937155145.929, "dur": 0.607, + "args": { + "External id": 976830,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2338706, + "ts": 6345937155148.625, "dur": 5.101, + "args": { + "External id": 976831,"Sequence number": 10552462, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 15875 + } + }, + { + "ph": "s", "id": 6, "pid": 2338706, "tid": 2338706, "ts": 6345937155148.625, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937155152.096, "dur": 0.579, + "args": { + "External id": 976832,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937155162.381, "dur": 5.448, + "args": { + "External id": 976833,"Sequence number": 10552463, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 15877 + } + }, + { + "ph": "s", "id": 5, "pid": 2338706, "tid": 2338706, "ts": 6345937155162.381, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937155165.045, "dur": 1.706, + "args": { + "External id": 976834,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 15878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2338706, + "ts": 6345937155172.731, "dur": 42.543, + "args": { + "External id": 976835,"Sequence number": 10552464, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2338706, + "ts": 6345937155176.853, "dur": 38.103, + "args": { + "External id": 976836,"Sequence number": 10552464, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937155179.797, "dur": 8.788, + "args": { + "External id": 976837,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 15881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937155182.536, "dur": 5.307, + "args": { + "External id": 976838,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937155189.732, "dur": 24.703, + "args": { + "External id": 976839,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 15883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937155245.462, "dur": 4.873, + "args": { + "External id": 976840,"Sequence number": 10552464, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 15884 + } + }, + { + "ph": "s", "id": 4, "pid": 2338706, "tid": 2338706, "ts": 6345937155245.462, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937155253.024, "dur": 3.814, + "args": { + "External id": 976841,"Sequence number": 10552465, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345937155295.213, "dur": 45943.800, + "args": { + "External id": 976842,"Sequence number": 10552465, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 15886 + } + }, + { + "ph": "s", "id": 3, "pid": 2338706, "tid": 2338706, "ts": 6345937155295.213, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2338706, + "ts": 6345937155312.908, "dur": 31.788, + "args": { + "External id": 976843,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2338706, + "ts": 6345937155313.511, "dur": 30.968, + "args": { + "External id": 976844,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937155315.305, "dur": 7.534, + "args": { + "External id": 976845,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937155316.820, "dur": 5.372, + "args": { + "External id": 976846,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937155323.816, "dur": 20.195, + "args": { + "External id": 976847,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 15891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937155363.669, "dur": 31.397, + "args": { + "External id": 976848,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937155365.009, "dur": 6.732, + "args": { + "External id": 976849,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937155366.990, "dur": 4.384, + "args": { + "External id": 976850,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345937155373.197, "dur": 21.614, + "args": { + "External id": 976851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937155377.685, "dur": 16.674, + "args": { + "External id": 976852,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937155399.049, "dur": 21.577, + "args": { + "External id": 976853,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937155400.030, "dur": 4.033, + "args": { + "External id": 976854,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937155401.432, "dur": 2.353, + "args": { + "External id": 976855,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345937155404.621, "dur": 15.778, + "args": { + "External id": 976856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937155405.660, "dur": 14.345, + "args": { + "External id": 976857,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 15901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2338706, + "ts": 6345937155427.654, "dur": 23.000, + "args": { + "External id": 976858,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 15902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937155429.508, "dur": 3.596, + "args": { + "External id": 976859,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345937155433.858, "dur": 16.490, + "args": { + "External id": 976860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 15904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937155437.262, "dur": 12.632, + "args": { + "External id": 976861,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338706, "tid": 2338706, + "ts": 6345937155456.008, "dur": 28.831, + "args": { + "External id": 976862,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937155488.014, "dur": 60.977, + "args": { + "External id": 976863,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937155495.114, "dur": 53.416, + "args": { + "External id": 976864,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937155501.005, "dur": 2.324, + "args": { + "External id": 976865,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345937155504.945, "dur": 24.915, + "args": { + "External id": 976866,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345937155506.514, "dur": 23.059, + "args": { + "External id": 976867,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 15911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937155508.996, "dur": 4.676, + "args": { + "External id": 976868,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937155514.621, "dur": 14.506, + "args": { + "External id": 976869,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 15913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338706, "tid": 2338706, + "ts": 6345937155553.974, "dur": 39028.597, + "args": { + "External id": 976870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338706, "tid": 2338706, + "ts": 6345937155555.874, "dur": 39025.306, + "args": { + "External id": 976871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937194599.739, "dur": 10.277, + "args": { + "External id": 976872,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937194605.335, "dur": 1.528, + "args": { + "External id": 976873,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937194617.132, "dur": 133.407, + "args": { + "External id": 976874,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937194618.925, "dur": 8.475, + "args": { + "External id": 976875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937194622.259, "dur": 3.101, + "args": { + "External id": 976876,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937194624.126, "dur": 0.906, + "args": { + "External id": 976877,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937194631.479, "dur": 118.187, + "args": { + "External id": 976878,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937194634.493, "dur": 114.199, + "args": { + "External id": 976879,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937194756.004, "dur": 6.222, + "args": { + "External id": 976880,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937194759.492, "dur": 1.000, + "args": { + "External id": 976881,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937194773.457, "dur": 2.409, + "args": { + "External id": 976882,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937194787.611, "dur": 12.315, + "args": { + "External id": 976883,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937194793.797, "dur": 5.790, + "args": { + "External id": 976884,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937194973.925, "dur": 353.332, + "args": { + "External id": 976885,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937194979.653, "dur": 3.560, + "args": { + "External id": 976886,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937194984.831, "dur": 341.714, + "args": { + "External id": 976887,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937194986.778, "dur": 1.557, + "args": { + "External id": 976888,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937194989.942, "dur": 51.328, + "args": { + "External id": 976889,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937195045.813, "dur": 5.685, + "args": { + "External id": 976890,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937195049.815, "dur": 1.263, + "args": { + "External id": 976891,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937195084.387, "dur": 41.413, + "args": { + "External id": 976892,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937195090.482, "dur": 4.051, + "args": { + "External id": 976893,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937195096.523, "dur": 28.900, + "args": { + "External id": 976894,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937195102.154, "dur": 4.827, + "args": { + "External id": 976895,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937195128.617, "dur": 26.916, + "args": { + "External id": 976896,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937195191.853, "dur": 18.690, + "args": { + "External id": 976897,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937195215.632, "dur": 22.934, + "args": { + "External id": 976898,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937195241.301, "dur": 16.192, + "args": { + "External id": 976899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937195260.733, "dur": 31.206, + "args": { + "External id": 976900,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937195265.145, "dur": 2.748, + "args": { + "External id": 976901,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937195272.524, "dur": 0.836, + "args": { + "External id": 976902,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937195294.698, "dur": 14.696, + "args": { + "External id": 976903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937195310.911, "dur": 14.174, + "args": { + "External id": 976904,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937195338.881, "dur": 2.810, + "args": { + "External id": 976905,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937195351.385, "dur": 5.467, + "args": { + "External id": 976906,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937195355.081, "dur": 0.605, + "args": { + "External id": 976907,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937195457.537, "dur": 89.057, + "args": { + "External id": 976908,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937195556.065, "dur": 11.123, + "args": { + "External id": 976909,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937195560.591, "dur": 3.511, + "args": { + "External id": 976910,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937195568.982, "dur": 35.940, + "args": { + "External id": 976911,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937195611.968, "dur": 6.509, + "args": { + "External id": 976912,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937195613.784, "dur": 3.918, + "args": { + "External id": 976913,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937195615.914, "dur": 1.508, + "args": { + "External id": 976914,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937195625.059, "dur": 60.134, + "args": { + "External id": 976915,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937195626.666, "dur": 57.598, + "args": { + "External id": 976916,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937195690.687, "dur": 19.908, + "args": { + "External id": 976917,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937195718.629, "dur": 5.384, + "args": { + "External id": 976918,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937195722.333, "dur": 0.621, + "args": { + "External id": 976919,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937195729.076, "dur": 57.943, + "args": { + "External id": 976920,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937195730.535, "dur": 6.853, + "args": { + "External id": 976921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937195734.447, "dur": 2.259, + "args": { + "External id": 976922,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937195735.717, "dur": 0.830, + "args": { + "External id": 976923,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937195738.257, "dur": 48.351, + "args": { + "External id": 976924,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937195739.003, "dur": 46.978, + "args": { + "External id": 976925,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937195791.468, "dur": 3.993, + "args": { + "External id": 976926,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937195793.582, "dur": 0.458, + "args": { + "External id": 976927,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937195804.693, "dur": 1.682, + "args": { + "External id": 976928,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937195816.977, "dur": 8.095, + "args": { + "External id": 976929,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937195820.281, "dur": 4.432, + "args": { + "External id": 976930,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937195937.023, "dur": 301.617, + "args": { + "External id": 976931,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937195941.896, "dur": 4.306, + "args": { + "External id": 976932,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937195947.816, "dur": 290.161, + "args": { + "External id": 976933,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937195951.843, "dur": 0.648, + "args": { + "External id": 976934,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937195953.804, "dur": 25.319, + "args": { + "External id": 976935,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937195980.829, "dur": 4.642, + "args": { + "External id": 976936,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937195984.262, "dur": 0.925, + "args": { + "External id": 976937,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937195986.607, "dur": 47.632, + "args": { + "External id": 976938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937195987.924, "dur": 1.409, + "args": { + "External id": 976939,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937195990.776, "dur": 42.546, + "args": { + "External id": 976940,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937195993.726, "dur": 2.345, + "args": { + "External id": 976941,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937196036.795, "dur": 68.762, + "args": { + "External id": 976942,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937196108.785, "dur": 21.207, + "args": { + "External id": 976943,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937196136.267, "dur": 20.470, + "args": { + "External id": 976944,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937196158.579, "dur": 16.221, + "args": { + "External id": 976945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937196177.752, "dur": 26.080, + "args": { + "External id": 976946,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937196180.523, "dur": 2.178, + "args": { + "External id": 976947,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937196184.991, "dur": 1.014, + "args": { + "External id": 976948,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937196205.602, "dur": 15.371, + "args": { + "External id": 976949,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937196222.498, "dur": 13.943, + "args": { + "External id": 976950,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937196251.310, "dur": 2.776, + "args": { + "External id": 976951,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937196267.291, "dur": 5.669, + "args": { + "External id": 976952,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937196271.093, "dur": 0.774, + "args": { + "External id": 976953,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937196365.017, "dur": 73.474, + "args": { + "External id": 976954,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937196444.733, "dur": 5.384, + "args": { + "External id": 976955,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937196448.182, "dur": 0.603, + "args": { + "External id": 976956,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937196451.844, "dur": 32.020, + "args": { + "External id": 976957,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937196489.967, "dur": 10.777, + "args": { + "External id": 976958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937196492.087, "dur": 7.954, + "args": { + "External id": 976959,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937196495.960, "dur": 3.879, + "args": { + "External id": 976960,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937196504.034, "dur": 54.785, + "args": { + "External id": 976961,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937196505.196, "dur": 52.736, + "args": { + "External id": 976962,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937196575.527, "dur": 20.257, + "args": { + "External id": 976963,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937196602.868, "dur": 8.637, + "args": { + "External id": 976964,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937196609.562, "dur": 0.949, + "args": { + "External id": 976965,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937196618.594, "dur": 54.484, + "args": { + "External id": 976966,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937196620.032, "dur": 3.572, + "args": { + "External id": 976967,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937196620.882, "dur": 2.066, + "args": { + "External id": 976968,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937196622.195, "dur": 0.596, + "args": { + "External id": 976969,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937196624.411, "dur": 48.270, + "args": { + "External id": 976970,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937196625.087, "dur": 46.585, + "args": { + "External id": 976971,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937196678.626, "dur": 3.959, + "args": { + "External id": 976972,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937196680.632, "dur": 0.522, + "args": { + "External id": 976973,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937196692.133, "dur": 1.987, + "args": { + "External id": 976974,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937196703.574, "dur": 8.638, + "args": { + "External id": 976975,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937196705.737, "dur": 6.101, + "args": { + "External id": 976976,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937196818.985, "dur": 233.327, + "args": { + "External id": 976977,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937196821.378, "dur": 2.240, + "args": { + "External id": 976978,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937196825.062, "dur": 226.625, + "args": { + "External id": 976979,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937196829.623, "dur": 0.359, + "args": { + "External id": 976980,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937196831.251, "dur": 29.308, + "args": { + "External id": 976981,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937196862.632, "dur": 3.249, + "args": { + "External id": 976982,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937196864.915, "dur": 0.654, + "args": { + "External id": 976983,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937196866.958, "dur": 24.891, + "args": { + "External id": 976984,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937196867.929, "dur": 1.455, + "args": { + "External id": 976985,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937196870.681, "dur": 20.844, + "args": { + "External id": 976986,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937196875.315, "dur": 2.358, + "args": { + "External id": 976987,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937196893.385, "dur": 24.887, + "args": { + "External id": 976988,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937196919.907, "dur": 14.537, + "args": { + "External id": 976989,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937196939.783, "dur": 15.759, + "args": { + "External id": 976990,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937196957.028, "dur": 14.498, + "args": { + "External id": 976991,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937196973.443, "dur": 21.719, + "args": { + "External id": 976992,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937196975.918, "dur": 1.646, + "args": { + "External id": 976993,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937196979.539, "dur": 0.466, + "args": { + "External id": 976994,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937196996.709, "dur": 34.812, + "args": { + "External id": 976995,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937197036.560, "dur": 13.270, + "args": { + "External id": 976996,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937197099.564, "dur": 2.867, + "args": { + "External id": 976997,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937197115.937, "dur": 5.564, + "args": { + "External id": 976998,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937197119.867, "dur": 0.680, + "args": { + "External id": 976999,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937197203.633, "dur": 69.455, + "args": { + "External id": 977000,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937197280.129, "dur": 5.248, + "args": { + "External id": 977001,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937197283.182, "dur": 1.088, + "args": { + "External id": 977002,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937197287.243, "dur": 31.584, + "args": { + "External id": 977003,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937197326.454, "dur": 5.564, + "args": { + "External id": 977004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937197328.287, "dur": 3.073, + "args": { + "External id": 977005,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937197330.232, "dur": 0.867, + "args": { + "External id": 977006,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937197335.142, "dur": 49.465, + "args": { + "External id": 977007,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937197336.520, "dur": 47.444, + "args": { + "External id": 977008,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937197389.202, "dur": 16.336, + "args": { + "External id": 977009,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937197412.122, "dur": 6.974, + "args": { + "External id": 977010,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937197417.235, "dur": 0.829, + "args": { + "External id": 977011,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937197423.769, "dur": 51.600, + "args": { + "External id": 977012,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937197424.778, "dur": 5.169, + "args": { + "External id": 977013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937197425.754, "dur": 3.502, + "args": { + "External id": 977014,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937197426.858, "dur": 2.164, + "args": { + "External id": 977015,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937197430.853, "dur": 44.018, + "args": { + "External id": 977016,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937197433.634, "dur": 40.548, + "args": { + "External id": 977017,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937197479.835, "dur": 4.145, + "args": { + "External id": 977018,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937197482.047, "dur": 0.596, + "args": { + "External id": 977019,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937197490.293, "dur": 1.730, + "args": { + "External id": 977020,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937197506.579, "dur": 9.534, + "args": { + "External id": 977021,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937197511.647, "dur": 4.154, + "args": { + "External id": 977022,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937197612.204, "dur": 198.173, + "args": { + "External id": 977023,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937197615.841, "dur": 2.177, + "args": { + "External id": 977024,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937197619.538, "dur": 190.270, + "args": { + "External id": 977025,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937197620.868, "dur": 0.723, + "args": { + "External id": 977026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937197622.842, "dur": 22.333, + "args": { + "External id": 977027,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937197646.815, "dur": 7.356, + "args": { + "External id": 977028,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937197650.316, "dur": 3.585, + "args": { + "External id": 977029,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937197657.569, "dur": 22.405, + "args": { + "External id": 977030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937197658.848, "dur": 1.250, + "args": { + "External id": 977031,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937197661.197, "dur": 18.465, + "args": { + "External id": 977032,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937197663.993, "dur": 2.522, + "args": { + "External id": 977033,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937197681.444, "dur": 20.656, + "args": { + "External id": 977034,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937197703.882, "dur": 16.237, + "args": { + "External id": 977035,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937197722.862, "dur": 13.896, + "args": { + "External id": 977036,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937197738.154, "dur": 12.928, + "args": { + "External id": 977037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937197753.115, "dur": 25.558, + "args": { + "External id": 977038,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937197755.673, "dur": 1.597, + "args": { + "External id": 977039,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937197761.707, "dur": 0.801, + "args": { + "External id": 977040,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937197780.386, "dur": 14.729, + "args": { + "External id": 977041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937197796.233, "dur": 12.408, + "args": { + "External id": 977042,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937197816.842, "dur": 1.997, + "args": { + "External id": 977043,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937197828.392, "dur": 3.969, + "args": { + "External id": 977044,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937197830.737, "dur": 0.621, + "args": { + "External id": 977045,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937197904.797, "dur": 56.175, + "args": { + "External id": 977046,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937197966.854, "dur": 7.094, + "args": { + "External id": 977047,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937197971.909, "dur": 0.825, + "args": { + "External id": 977048,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937197975.673, "dur": 27.224, + "args": { + "External id": 977049,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937198025.094, "dur": 8.952, + "args": { + "External id": 977050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937198027.767, "dur": 5.273, + "args": { + "External id": 977051,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937198031.038, "dur": 1.591, + "args": { + "External id": 977052,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937198038.047, "dur": 99.528, + "args": { + "External id": 977053,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937198041.656, "dur": 94.518, + "args": { + "External id": 977054,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937198144.267, "dur": 20.333, + "args": { + "External id": 977055,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937198172.830, "dur": 7.602, + "args": { + "External id": 977056,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937198175.537, "dur": 3.470, + "args": { + "External id": 977057,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937198185.014, "dur": 52.869, + "args": { + "External id": 977058,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937198186.213, "dur": 6.555, + "args": { + "External id": 977059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937198187.418, "dur": 4.695, + "args": { + "External id": 977060,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937198191.274, "dur": 0.674, + "args": { + "External id": 977061,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937198193.498, "dur": 43.916, + "args": { + "External id": 977062,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937198194.362, "dur": 42.381, + "args": { + "External id": 977063,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937198242.797, "dur": 3.776, + "args": { + "External id": 977064,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937198244.678, "dur": 0.542, + "args": { + "External id": 977065,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937198253.783, "dur": 1.756, + "args": { + "External id": 977066,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937198264.138, "dur": 11.045, + "args": { + "External id": 977067,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937198268.857, "dur": 5.996, + "args": { + "External id": 977068,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937198380.748, "dur": 203.438, + "args": { + "External id": 977069,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937198383.202, "dur": 2.390, + "args": { + "External id": 977070,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937198387.110, "dur": 196.570, + "args": { + "External id": 977071,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937198388.709, "dur": 0.468, + "args": { + "External id": 977072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937198390.770, "dur": 26.405, + "args": { + "External id": 977073,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937198418.948, "dur": 3.288, + "args": { + "External id": 977074,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937198420.976, "dur": 0.969, + "args": { + "External id": 977075,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937198426.334, "dur": 29.401, + "args": { + "External id": 977076,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937198427.457, "dur": 4.047, + "args": { + "External id": 977077,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937198432.788, "dur": 22.521, + "args": { + "External id": 977078,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937198438.022, "dur": 2.989, + "args": { + "External id": 977079,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937198457.328, "dur": 24.038, + "args": { + "External id": 977080,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937198483.033, "dur": 14.928, + "args": { + "External id": 977081,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937198501.075, "dur": 15.897, + "args": { + "External id": 977082,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937198518.449, "dur": 13.100, + "args": { + "External id": 977083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937198533.592, "dur": 22.159, + "args": { + "External id": 977084,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937198537.984, "dur": 1.577, + "args": { + "External id": 977085,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937198541.431, "dur": 0.577, + "args": { + "External id": 977086,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937198557.343, "dur": 12.775, + "args": { + "External id": 977087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937198571.334, "dur": 11.134, + "args": { + "External id": 977088,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937198591.824, "dur": 2.000, + "args": { + "External id": 977089,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937198604.273, "dur": 3.890, + "args": { + "External id": 977090,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937198606.611, "dur": 0.522, + "args": { + "External id": 977091,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937198680.178, "dur": 62.085, + "args": { + "External id": 977092,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937198747.919, "dur": 15.317, + "args": { + "External id": 977093,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937198759.035, "dur": 2.683, + "args": { + "External id": 977094,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937198764.992, "dur": 31.166, + "args": { + "External id": 977095,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937198801.510, "dur": 8.201, + "args": { + "External id": 977096,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937198803.215, "dur": 5.773, + "args": { + "External id": 977097,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937198807.780, "dur": 0.980, + "args": { + "External id": 977098,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937198812.801, "dur": 47.601, + "args": { + "External id": 977099,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937198813.890, "dur": 45.522, + "args": { + "External id": 977100,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937198865.178, "dur": 15.264, + "args": { + "External id": 977101,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937198886.878, "dur": 4.212, + "args": { + "External id": 977102,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937198889.494, "dur": 0.547, + "args": { + "External id": 977103,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937198897.756, "dur": 50.035, + "args": { + "External id": 977104,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937198898.570, "dur": 5.899, + "args": { + "External id": 977105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937198899.272, "dur": 4.520, + "args": { + "External id": 977106,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937198900.558, "dur": 3.064, + "args": { + "External id": 977107,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937198905.099, "dur": 42.322, + "args": { + "External id": 977108,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937198905.658, "dur": 40.936, + "args": { + "External id": 977109,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937198954.854, "dur": 3.727, + "args": { + "External id": 977110,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937198956.774, "dur": 0.520, + "args": { + "External id": 977111,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937198964.493, "dur": 1.588, + "args": { + "External id": 977112,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937198975.060, "dur": 6.528, + "args": { + "External id": 977113,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937198977.145, "dur": 4.106, + "args": { + "External id": 977114,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937199142.886, "dur": 204.726, + "args": { + "External id": 977115,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937199145.875, "dur": 5.577, + "args": { + "External id": 977116,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937199153.448, "dur": 193.651, + "args": { + "External id": 977117,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937199155.063, "dur": 0.580, + "args": { + "External id": 977118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937199157.097, "dur": 25.271, + "args": { + "External id": 977119,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937199184.068, "dur": 6.051, + "args": { + "External id": 977120,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937199188.608, "dur": 1.104, + "args": { + "External id": 977121,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937199191.334, "dur": 25.359, + "args": { + "External id": 977122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937199192.680, "dur": 1.481, + "args": { + "External id": 977123,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937199195.396, "dur": 20.880, + "args": { + "External id": 977124,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937199200.912, "dur": 2.800, + "args": { + "External id": 977125,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937199218.059, "dur": 24.566, + "args": { + "External id": 977126,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937199244.199, "dur": 14.252, + "args": { + "External id": 977127,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937199261.209, "dur": 14.254, + "args": { + "External id": 977128,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937199276.888, "dur": 13.346, + "args": { + "External id": 977129,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937199292.279, "dur": 23.444, + "args": { + "External id": 977130,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937199294.726, "dur": 1.614, + "args": { + "External id": 977131,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937199298.253, "dur": 1.791, + "args": { + "External id": 977132,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937199320.863, "dur": 12.929, + "args": { + "External id": 977133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937199335.011, "dur": 10.942, + "args": { + "External id": 977134,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937199356.660, "dur": 2.359, + "args": { + "External id": 977135,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937199370.343, "dur": 4.134, + "args": { + "External id": 977136,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937199372.822, "dur": 0.660, + "args": { + "External id": 977137,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937199447.467, "dur": 63.380, + "args": { + "External id": 977138,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937199516.820, "dur": 5.234, + "args": { + "External id": 977139,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937199520.116, "dur": 0.687, + "args": { + "External id": 977140,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937199523.818, "dur": 26.884, + "args": { + "External id": 977141,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937199558.377, "dur": 6.551, + "args": { + "External id": 977142,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937199560.477, "dur": 3.703, + "args": { + "External id": 977143,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937199562.349, "dur": 1.619, + "args": { + "External id": 977144,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937199568.409, "dur": 45.832, + "args": { + "External id": 977145,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937199569.539, "dur": 43.833, + "args": { + "External id": 977146,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937199619.154, "dur": 16.060, + "args": { + "External id": 977147,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937199641.700, "dur": 6.576, + "args": { + "External id": 977148,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937199646.465, "dur": 0.740, + "args": { + "External id": 977149,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937199653.103, "dur": 53.591, + "args": { + "External id": 977150,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937199654.384, "dur": 3.611, + "args": { + "External id": 977151,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937199655.152, "dur": 2.222, + "args": { + "External id": 977152,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937199656.452, "dur": 0.722, + "args": { + "External id": 977153,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937199658.839, "dur": 47.494, + "args": { + "External id": 977154,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937199661.912, "dur": 43.656, + "args": { + "External id": 977155,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937199710.894, "dur": 4.047, + "args": { + "External id": 977156,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937199712.830, "dur": 0.751, + "args": { + "External id": 977157,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937199720.806, "dur": 1.635, + "args": { + "External id": 977158,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937199730.970, "dur": 7.033, + "args": { + "External id": 977159,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937199732.779, "dur": 4.824, + "args": { + "External id": 977160,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937199829.508, "dur": 208.543, + "args": { + "External id": 977161,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937199833.666, "dur": 2.170, + "args": { + "External id": 977162,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937199837.323, "dur": 199.936, + "args": { + "External id": 977163,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937199838.545, "dur": 0.407, + "args": { + "External id": 977164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937199840.097, "dur": 20.347, + "args": { + "External id": 977165,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937199862.098, "dur": 4.456, + "args": { + "External id": 977166,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937199865.653, "dur": 0.624, + "args": { + "External id": 977167,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937199867.507, "dur": 26.450, + "args": { + "External id": 977168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937199868.590, "dur": 1.435, + "args": { + "External id": 977169,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937199871.129, "dur": 22.469, + "args": { + "External id": 977170,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937199875.966, "dur": 2.308, + "args": { + "External id": 977171,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937199895.208, "dur": 22.270, + "args": { + "External id": 977172,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937199919.315, "dur": 14.559, + "args": { + "External id": 977173,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937199937.023, "dur": 13.885, + "args": { + "External id": 977174,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937199952.284, "dur": 12.603, + "args": { + "External id": 977175,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937199966.722, "dur": 20.086, + "args": { + "External id": 977176,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937199969.007, "dur": 1.575, + "args": { + "External id": 977177,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937199972.350, "dur": 0.759, + "args": { + "External id": 977178,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937199990.546, "dur": 12.476, + "args": { + "External id": 977179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937200003.929, "dur": 31.409, + "args": { + "External id": 977180,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937200047.302, "dur": 2.593, + "args": { + "External id": 977181,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937200098.805, "dur": 6.717, + "args": { + "External id": 977182,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937200103.202, "dur": 0.843, + "args": { + "External id": 977183,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937200185.335, "dur": 66.123, + "args": { + "External id": 977184,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937200256.955, "dur": 7.793, + "args": { + "External id": 977185,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937200262.568, "dur": 0.955, + "args": { + "External id": 977186,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937200266.458, "dur": 31.436, + "args": { + "External id": 977187,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937200302.695, "dur": 5.841, + "args": { + "External id": 977188,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937200304.540, "dur": 3.315, + "args": { + "External id": 977189,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937200306.399, "dur": 1.242, + "args": { + "External id": 977190,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937200311.299, "dur": 44.971, + "args": { + "External id": 977191,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937200312.317, "dur": 43.321, + "args": { + "External id": 977192,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937200363.631, "dur": 22.899, + "args": { + "External id": 977193,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937200393.283, "dur": 4.118, + "args": { + "External id": 977194,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937200395.536, "dur": 0.768, + "args": { + "External id": 977195,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345937200401.961, "dur": 51.888, + "args": { + "External id": 977196,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937200402.953, "dur": 6.082, + "args": { + "External id": 977197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937200403.610, "dur": 4.797, + "args": { + "External id": 977198,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937200407.489, "dur": 0.753, + "args": { + "External id": 977199,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937200409.788, "dur": 43.628, + "args": { + "External id": 977200,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937200410.393, "dur": 42.171, + "args": { + "External id": 977201,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937200458.076, "dur": 3.963, + "args": { + "External id": 977202,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937200460.004, "dur": 0.645, + "args": { + "External id": 977203,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937200467.840, "dur": 1.726, + "args": { + "External id": 977204,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937200480.449, "dur": 7.897, + "args": { + "External id": 977205,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937200482.602, "dur": 5.375, + "args": { + "External id": 977206,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937200575.667, "dur": 198.055, + "args": { + "External id": 977207,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937200577.734, "dur": 2.184, + "args": { + "External id": 977208,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345937200581.399, "dur": 191.769, + "args": { + "External id": 977209,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345937200583.184, "dur": 0.473, + "args": { + "External id": 977210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345937200584.845, "dur": 23.550, + "args": { + "External id": 977211,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345937200612.264, "dur": 3.167, + "args": { + "External id": 977212,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937200614.274, "dur": 0.903, + "args": { + "External id": 977213,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937200616.295, "dur": 24.096, + "args": { + "External id": 977214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345937200617.776, "dur": 1.428, + "args": { + "External id": 977215,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345937200620.511, "dur": 19.516, + "args": { + "External id": 977216,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937200625.560, "dur": 2.354, + "args": { + "External id": 977217,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345937200642.025, "dur": 22.998, + "args": { + "External id": 977218,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937200666.843, "dur": 14.206, + "args": { + "External id": 977219,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345937200683.567, "dur": 20.411, + "args": { + "External id": 977220,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345937200705.777, "dur": 13.744, + "args": { + "External id": 977221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937200721.584, "dur": 24.232, + "args": { + "External id": 977222,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345937200726.567, "dur": 2.057, + "args": { + "External id": 977223,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937200730.495, "dur": 0.789, + "args": { + "External id": 977224,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345937200747.434, "dur": 12.770, + "args": { + "External id": 977225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937200761.540, "dur": 10.325, + "args": { + "External id": 977226,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345937200780.673, "dur": 1.742, + "args": { + "External id": 977227,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937200792.005, "dur": 3.906, + "args": { + "External id": 977228,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937200794.301, "dur": 0.726, + "args": { + "External id": 977229,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937200867.434, "dur": 53.263, + "args": { + "External id": 977230,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345937200925.911, "dur": 4.770, + "args": { + "External id": 977231,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937200928.595, "dur": 0.663, + "args": { + "External id": 977232,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937200931.862, "dur": 26.689, + "args": { + "External id": 977233,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345937200963.229, "dur": 8.404, + "args": { + "External id": 977234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345937200965.205, "dur": 5.687, + "args": { + "External id": 977235,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937200969.605, "dur": 1.077, + "args": { + "External id": 977236,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345937200974.572, "dur": 67.671, + "args": { + "External id": 977237,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345937200975.726, "dur": 65.053, + "args": { + "External id": 977238,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937201048.293, "dur": 58.841, + "args": { + "External id": 977239,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937201114.645, "dur": 33.130, + "args": { + "External id": 977240,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 16284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345937201117.628, "dur": 29.629, + "args": { + "External id": 977241,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937201125.952, "dur": 0.725, + "args": { + "External id": 977242,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 16286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345937201154.099, "dur": 34.266, + "args": { + "External id": 977243,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 16287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345937201156.542, "dur": 31.579, + "args": { + "External id": 977244,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 16288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937201164.050, "dur": 4.091, + "args": { + "External id": 977245,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345937201169.815, "dur": 17.701, + "args": { + "External id": 977246,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2338706, + "ts": 6345937201201.660, "dur": 6.854, + "args": { + "External id": 977247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 16291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2338706, + "ts": 6345937201204.458, "dur": 3.796, + "args": { + "External id": 977248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 16292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2338706, + "ts": 6345937201209.912, "dur": 1.380, + "args": { + "External id": 977249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2338706, + "ts": 6345937201210.434, "dur": 0.765, + "args": { + "External id": 977250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937201260.869, "dur": 30.590, + "args": { + "External id": 977251,"Sequence number": 10552466, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 16295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345937201296.692, "dur": 15.971, + "args": { + "External id": 977252,"Sequence number": 10552467, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 16296 + } + }, + { + "ph": "s", "id": 2, "pid": 2338706, "tid": 2338706, "ts": 6345937201296.692, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345937201432.154, "dur": 47.877, + "args": { + "External id": 977253,"Record function id": 0, "Ev Idx": 16297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338706, "tid": 2338706, + "ts": 6345937201593.824, "dur": 38.589, + "args": { + "External id": 977254,"Sequence number": 10552468, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 16298 + } + }, + { + "ph": "s", "id": 1, "pid": 2338706, "tid": 2338706, "ts": 6345937201593.824, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ones_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937201720.234, "dur": 32.477, + "args": { + "External id": 977255,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 16299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345937201722.149, "dur": 9.543, + "args": { + "External id": 977256,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 16300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345937201725.789, "dur": 5.091, + "args": { + "External id": 977257,"Record function id": 0, "Concrete Inputs": ["[1]", "[1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345937201733.563, "dur": 18.726, + "args": { + "External id": 977258,"Record function id": 0, "Concrete Inputs": ["", "1."], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 16302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338706, "tid": 2338706, + "ts": 6345939290564.203, "dur": 70.402, + "args": { + "External id": 977259,"Sequence number": 10552469, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 16303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338706, "tid": 2338706, + "ts": 6345939290645.894, "dur": 24.039, + "args": { + "External id": 977260,"Sequence number": 10552470, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 16304 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "enumerate(DataLoader)#_StatefulMultiProcessingDataLoaderIter.__next__", "pid": 2338706, "tid": 2338706, + "ts": 6345939290716.913, "dur": 235.657, + "args": { + "External id": 977261,"Record function id": 0, "Ev Idx": 16305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939291365.038, "dur": 18.123, + "args": { + "External id": 977262,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 16306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939291375.672, "dur": 3.100, + "args": { + "External id": 977263,"Record function id": 0, "Concrete Inputs": ["", "[8, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 16307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939291385.508, "dur": 580.049, + "args": { + "External id": 977264,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 16308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939291960.665, "dur": 2.012, + "args": { + "External id": 977265,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 16309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939291998.404, "dur": 16879.991, + "args": { + "External id": 977266,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], [], []], "Ev Idx": 16310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939292004.488, "dur": 16872.981, + "args": { + "External id": 977267,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 16311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939292036.335, "dur": 13.302, + "args": { + "External id": 977268,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939292085.783, "dur": 16789.721, + "args": { + "External id": 977269,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939292097.188, "dur": 0.635, + "args": { + "External id": 977270,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 16314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939292103.400, "dur": 8.874, + "args": { + "External id": 977271,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[8192, 1], [4096, 1]], "Input Dims": [[8, 4096], [8, 4096]], "Ev Idx": 16315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338706, "tid": 2338706, + "ts": 6345939292107.055, "dur": 4.987, + "args": { + "External id": 977272,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], []], "Input Dims": [[8, 4096], [], []], "Ev Idx": 16316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939292110.414, "dur": 1.306, + "args": { + "External id": 977273,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 16317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2338706, + "ts": 6345939292114.220, "dur": 153.350, + "args": { + "External id": 977274,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 16318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2338706, + "ts": 6345939292115.782, "dur": 151.484, + "args": { + "External id": 977275,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 16319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939292117.891, "dur": 15.183, + "args": { + "External id": 977276,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 16320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939292121.012, "dur": 11.461, + "args": { + "External id": 977277,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939292135.227, "dur": 131.626, + "args": { + "External id": 977278,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939292270.001, "dur": 16598.721, + "args": { + "External id": 977279,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939308901.568, "dur": 512.047, + "args": { + "External id": 977280,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 16324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939308905.281, "dur": 507.914, + "args": { + "External id": 977281,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], []], "Ev Idx": 16325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939308912.861, "dur": 12.839, + "args": { + "External id": 977282,"Record function id": 0, "Concrete Inputs": ["[8, 8192]", "[8192, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939308928.455, "dur": 482.851, + "args": { + "External id": 977283,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[8192, 1], [8192, 1], []], "Input Dims": [[8, 8192], [8, 8192], []], "Ev Idx": 16327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338706, "tid": 2338706, + "ts": 6345939309449.053, "dur": 62.150, + "args": { + "External id": 977284,"Record function id": 0, "Concrete Inputs": ["0", "4096", "", "", "", "False"], "Input type": ["Scalar", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939309456.140, "dur": 5.663, + "args": { + "External id": 977285,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338706, "tid": 2338706, + "ts": 6345939309466.496, "dur": 44.142, + "args": { + "External id": 977286,"Record function id": 0, "Concrete Inputs": ["0", "4096", "1", ""], "Input type": ["Scalar", "Scalar", "Scalar", "long int"], "Input Strides": [[], [], [], [1]], "Input Dims": [[], [], [], [0]], "Ev Idx": 16330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345939309472.335, "dur": 8.701, + "args": { + "External id": 977287,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["long int", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::repeat", "pid": 2338706, "tid": 2338706, + "ts": 6345939309525.015, "dur": 93.204, + "args": { + "External id": 977288,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338706, "tid": 2338706, + "ts": 6345939309531.410, "dur": 10.885, + "args": { + "External id": 977289,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[4096], [], []], "Ev Idx": 16333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939309538.720, "dur": 3.135, + "args": { + "External id": 977290,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "[4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939309544.047, "dur": 5.110, + "args": { + "External id": 977291,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2338706, + "ts": 6345939309551.064, "dur": 3.379, + "args": { + "External id": 977292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[4096, 1]], "Input Dims": [[8, 4096]], "Ev Idx": 16336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338706, "tid": 2338706, + "ts": 6345939309557.175, "dur": 11.298, + "args": { + "External id": 977293,"Record function id": 0, "Concrete Inputs": ["", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 16337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939309563.827, "dur": 4.378, + "args": { + "External id": 977294,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1]", "[4096, 1, 4096]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 16338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338706, "tid": 2338706, + "ts": 6345939309569.680, "dur": 4.949, + "args": { + "External id": 977295,"Record function id": 0, "Concrete Inputs": ["", "1", "4096", "4096"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 16339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939309573.579, "dur": 0.924, + "args": { + "External id": 977296,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[4096, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 16340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939309577.319, "dur": 6.609, + "args": { + "External id": 977297,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[4096, 1], [4096, 4096, 4096, 1]], "Input Dims": [[1, 4096], [8, 1, 1, 4096]], "Ev Idx": 16341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338706, "tid": 2338706, + "ts": 6345939309580.185, "dur": 3.558, + "args": { + "External id": 977298,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1, 4096], [], []], "Ev Idx": 16342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939309582.863, "dur": 0.728, + "args": { + "External id": 977299,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[0, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 16343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939309586.128, "dur": 29.978, + "args": { + "External id": 977300,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 4096, 4096, 1], [0, 4096, 4096, 1], []], "Input Dims": [[8, 1, 1, 4096], [8, 1, 1, 4096], []], "Ev Idx": 16344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939309630.625, "dur": 32.376, + "args": { + "External id": 977301,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 16345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939309632.184, "dur": 30.611, + "args": { + "External id": 977302,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 16346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939309637.077, "dur": 3.953, + "args": { + "External id": 977303,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "3", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939309642.040, "dur": 20.330, + "args": { + "External id": 977304,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16348 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_pre_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939309810.031, "dur": 193.172, + "args": { + "External id": 977305,"Record function id": 0, "Ev Idx": 16349 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::inputs_to_device", "pid": 2338706, "tid": 2338706, + "ts": 6345939309917.055, "dur": 72.520, + "args": { + "External id": 977306,"Record function id": 0, "Ev Idx": 16350 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939310042.578, "dur": 114.434, + "args": { + "External id": 977307,"Record function id": 0, "Ev Idx": 16351 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939310171.041, "dur": 14200.176, + "args": { + "External id": 977308,"Record function id": 0, "Ev Idx": 16352 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather", "pid": 2338706, "tid": 2338706, + "ts": 6345939310182.193, "dur": 1760.513, + "args": { + "External id": 977309,"Record function id": 0, "Ev Idx": 16353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939310335.802, "dur": 13.059, + "args": { + "External id": 977310,"Record function id": 0, "Concrete Inputs": ["[141824512]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939310372.339, "dur": 150.534, + "args": { + "External id": 977311,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 16355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310380.627, "dur": 3.241, + "args": { + "External id": 977312,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310387.704, "dur": 0.582, + "args": { + "External id": 977313,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310390.023, "dur": 0.591, + "args": { + "External id": 977314,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "16384512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310392.056, "dur": 4.250, + "args": { + "External id": 977315,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "18481664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310398.159, "dur": 0.535, + "args": { + "External id": 977316,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19005952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310400.924, "dur": 0.597, + "args": { + "External id": 977317,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "19530240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310404.903, "dur": 0.358, + "args": { + "External id": 977318,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310406.960, "dur": 0.406, + "args": { + "External id": 977319,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "21627904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310409.211, "dur": 0.518, + "args": { + "External id": 977320,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "28967936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310412.109, "dur": 0.572, + "args": { + "External id": 977321,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "36307968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310414.269, "dur": 0.495, + "args": { + "External id": 977322,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310416.448, "dur": 4.111, + "args": { + "External id": 977323,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "43648512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310422.735, "dur": 0.523, + "args": { + "External id": 977324,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45745664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310425.139, "dur": 0.771, + "args": { + "External id": 977325,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46269952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310429.003, "dur": 0.334, + "args": { + "External id": 977326,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "46794240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310431.415, "dur": 0.569, + "args": { + "External id": 977327,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310433.741, "dur": 0.507, + "args": { + "External id": 977328,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "48891904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310437.327, "dur": 0.500, + "args": { + "External id": 977329,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "56231936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310439.975, "dur": 0.389, + "args": { + "External id": 977330,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "63571968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310442.586, "dur": 3.493, + "args": { + "External id": 977331,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310448.519, "dur": 0.487, + "args": { + "External id": 977332,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "70912512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310451.154, "dur": 0.378, + "args": { + "External id": 977333,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73009664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310454.793, "dur": 0.274, + "args": { + "External id": 977334,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73533952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310457.024, "dur": 0.404, + "args": { + "External id": 977335,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "74058240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310459.642, "dur": 0.423, + "args": { + "External id": 977336,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310462.761, "dur": 0.458, + "args": { + "External id": 977337,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "76155904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310465.086, "dur": 0.664, + "args": { + "External id": 977338,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "83495936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310467.422, "dur": 3.735, + "args": { + "External id": 977339,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "90835968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310473.173, "dur": 0.981, + "args": { + "External id": 977340,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310476.017, "dur": 0.438, + "args": { + "External id": 977341,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "98176512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310479.762, "dur": 0.340, + "args": { + "External id": 977342,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100273664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310482.110, "dur": 0.616, + "args": { + "External id": 977343,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100797952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310484.109, "dur": 0.781, + "args": { + "External id": 977344,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "101322240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310487.579, "dur": 0.785, + "args": { + "External id": 977345,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310489.957, "dur": 0.759, + "args": { + "External id": 977346,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "103419904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310492.796, "dur": 3.184, + "args": { + "External id": 977347,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "110759936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310497.593, "dur": 0.592, + "args": { + "External id": 977348,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "118099968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310499.749, "dur": 0.520, + "args": { + "External id": 977349,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310503.114, "dur": 0.331, + "args": { + "External id": 977350,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "125440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939310564.024, "dur": 158.283, + "args": { + "External id": 977351,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 16395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939310815.927, "dur": 479.561, + "args": { + "External id": 977352,"Record function id": 0, "Concrete Inputs": ["", "", "141824512", "8", "0", "15", ""], "Input type": ["TensorList", "", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 16396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939310835.875, "dur": 6.588, + "args": { + "External id": 977353,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939310849.227, "dur": 14.605, + "args": { + "External id": 977354,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "141824512"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 16398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939310854.718, "dur": 8.557, + "args": { + "External id": 977355,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "141824512", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[1134596096], [], [], [], []], "Ev Idx": 16399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310859.149, "dur": 0.830, + "args": { + "External id": 977356,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 16400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939310875.140, "dur": 164.701, + "args": { + "External id": 977357,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 16401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310877.942, "dur": 0.540, + "args": { + "External id": 977358,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310880.838, "dur": 2.106, + "args": { + "External id": 977359,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310884.921, "dur": 2.605, + "args": { + "External id": 977360,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "16384512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310888.729, "dur": 0.543, + "args": { + "External id": 977361,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "18481664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310892.718, "dur": 0.477, + "args": { + "External id": 977362,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19005952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310894.920, "dur": 0.307, + "args": { + "External id": 977363,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "19530240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310897.134, "dur": 0.357, + "args": { + "External id": 977364,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310900.466, "dur": 0.523, + "args": { + "External id": 977365,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "21627904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310902.502, "dur": 0.372, + "args": { + "External id": 977366,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "28967936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310904.894, "dur": 1.696, + "args": { + "External id": 977367,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "36307968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310908.487, "dur": 2.456, + "args": { + "External id": 977368,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310912.730, "dur": 0.588, + "args": { + "External id": 977369,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "43648512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310915.703, "dur": 0.285, + "args": { + "External id": 977370,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45745664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310918.187, "dur": 0.373, + "args": { + "External id": 977371,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46269952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310920.449, "dur": 0.810, + "args": { + "External id": 977372,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "46794240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310923.770, "dur": 0.373, + "args": { + "External id": 977373,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310926.004, "dur": 0.498, + "args": { + "External id": 977374,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "48891904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310928.447, "dur": 1.689, + "args": { + "External id": 977375,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "56231936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310931.918, "dur": 2.856, + "args": { + "External id": 977376,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "63571968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310936.469, "dur": 0.586, + "args": { + "External id": 977377,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310939.872, "dur": 0.383, + "args": { + "External id": 977378,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "70912512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310942.207, "dur": 0.393, + "args": { + "External id": 977379,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73009664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310944.661, "dur": 0.520, + "args": { + "External id": 977380,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73533952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310948.075, "dur": 0.634, + "args": { + "External id": 977381,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "74058240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310949.918, "dur": 0.541, + "args": { + "External id": 977382,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310951.688, "dur": 2.538, + "args": { + "External id": 977383,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "76155904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310955.568, "dur": 2.869, + "args": { + "External id": 977384,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "83495936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310959.595, "dur": 0.489, + "args": { + "External id": 977385,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "90835968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310963.283, "dur": 0.540, + "args": { + "External id": 977386,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310965.185, "dur": 0.603, + "args": { + "External id": 977387,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "98176512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310966.911, "dur": 0.618, + "args": { + "External id": 977388,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100273664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310970.728, "dur": 0.325, + "args": { + "External id": 977389,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100797952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310972.485, "dur": 0.251, + "args": { + "External id": 977390,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "101322240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310973.926, "dur": 2.753, + "args": { + "External id": 977391,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310978.034, "dur": 2.828, + "args": { + "External id": 977392,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "103419904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310982.248, "dur": 0.442, + "args": { + "External id": 977393,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "110759936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310986.376, "dur": 0.387, + "args": { + "External id": 977394,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "118099968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310988.063, "dur": 0.359, + "args": { + "External id": 977395,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939310989.720, "dur": 0.315, + "args": { + "External id": 977396,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "125440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939311141.405, "dur": 133.377, + "args": { + "External id": 977397,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 16441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939311376.702, "dur": 430.751, + "args": { + "External id": 977398,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[1134596096], [141824512], [], [], []], "Ev Idx": 16442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939311414.536, "dur": 387.214, + "args": { + "External id": 977399,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1134596096, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[141824512], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16443, "In msg nelems": 141824512 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939311427.686, "dur": 366.669, + "args": { + "External id": 977400,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[141824512]], "Ev Idx": 16444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939311834.967, "dur": 2.689, + "args": { + "External id": 977401,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16445, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out", "pid": 2338706, "tid": 2338706, + "ts": 6345939311961.755, "dur": 12073.878, + "args": { + "External id": 977402,"Record function id": 0, "Ev Idx": 16446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312326.195, "dur": 8.844, + "args": { + "External id": 977403,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 16447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312340.246, "dur": 1.429, + "args": { + "External id": 977404,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 16448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312344.355, "dur": 1.389, + "args": { + "External id": 977405,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312350.069, "dur": 3.926, + "args": { + "External id": 977406,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312356.060, "dur": 1.175, + "args": { + "External id": 977407,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312359.041, "dur": 1.058, + "args": { + "External id": 977408,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312413.759, "dur": 1.895, + "args": { + "External id": 983553,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312426.164, "dur": 2.447, + "args": { + "External id": 983554,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312430.225, "dur": 0.977, + "args": { + "External id": 983555,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312432.806, "dur": 1.046, + "args": { + "External id": 983556,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312435.852, "dur": 0.992, + "args": { + "External id": 983557,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312440.900, "dur": 3.184, + "args": { + "External id": 983558,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312445.795, "dur": 0.899, + "args": { + "External id": 983559,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312448.652, "dur": 1.046, + "args": { + "External id": 983560,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312451.430, "dur": 0.815, + "args": { + "External id": 983561,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312456.499, "dur": 2.466, + "args": { + "External id": 983562,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312460.373, "dur": 0.840, + "args": { + "External id": 983563,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312463.226, "dur": 0.966, + "args": { + "External id": 983564,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312465.999, "dur": 0.993, + "args": { + "External id": 983565,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312470.582, "dur": 3.093, + "args": { + "External id": 983566,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312475.306, "dur": 0.737, + "args": { + "External id": 983567,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312477.835, "dur": 0.913, + "args": { + "External id": 983568,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312480.600, "dur": 1.105, + "args": { + "External id": 983569,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312485.608, "dur": 2.528, + "args": { + "External id": 983570,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312489.575, "dur": 0.765, + "args": { + "External id": 983571,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312492.163, "dur": 0.886, + "args": { + "External id": 983572,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312494.926, "dur": 0.783, + "args": { + "External id": 983573,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312500.146, "dur": 2.977, + "args": { + "External id": 983574,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312504.807, "dur": 0.820, + "args": { + "External id": 983575,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312507.430, "dur": 1.161, + "args": { + "External id": 983576,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312510.350, "dur": 0.998, + "args": { + "External id": 983577,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312515.120, "dur": 2.459, + "args": { + "External id": 983578,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312523.865, "dur": 0.876, + "args": { + "External id": 983579,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312527.072, "dur": 0.664, + "args": { + "External id": 983580,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312529.496, "dur": 0.664, + "args": { + "External id": 983581,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312534.808, "dur": 3.631, + "args": { + "External id": 983582,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312539.792, "dur": 0.943, + "args": { + "External id": 983583,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312542.414, "dur": 0.986, + "args": { + "External id": 983584,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312544.859, "dur": 0.857, + "args": { + "External id": 983585,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939312549.758, "dur": 2.348, + "args": { + "External id": 983586,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 16486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939312588.607, "dur": 11343.558, + "args": { + "External id": 983587,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 16487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939312615.958, "dur": 11303.332, + "args": { + "External id": 983588,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 16488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939312643.331, "dur": 19.653, + "args": { + "External id": 983589,"Record function id": 0, "Concrete Inputs": ["[4384]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939312671.472, "dur": 11192.753, + "args": { + "External id": 983590,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], [], []], "Ev Idx": 16490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939312674.309, "dur": 11189.200, + "args": { + "External id": 983591,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], []], "Ev Idx": 16491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939312680.848, "dur": 6.755, + "args": { + "External id": 983592,"Record function id": 0, "Concrete Inputs": ["[4384]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939312689.520, "dur": 11168.703, + "args": { + "External id": 983593,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4384], [4384], []], "Ev Idx": 16493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939324478.190, "dur": 42.434, + "args": { + "External id": 983594,"Record function id": 0, "Ev Idx": 16494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 0/0", "pid": 2338706, "tid": 2338706, + "ts": 6345939324522.612, "dur": 278.193, + "args": { + "External id": 983595,"Record function id": 0, "Ev Idx": 16495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939324578.694, "dur": 211.348, + "args": { + "External id": 983596,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "long int"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32000, 4096], [8, 4096]], "Ev Idx": 16496 + } + }, + { + "ph": "s", "id": 448, "pid": 2338706, "tid": 2338706, "ts": 6345939324578.694, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939324674.237, "dur": 66.513, + "args": { + "External id": 983597,"kernel_hash": "cwt66mnkcya5wyvezkdac4cj5ntiugvh6bs2jikth6qdzhubcefg", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wt/cwt66mnkcya5wyvezkdac4cj5ntiugvh6bs2jikth6qdzhubcefg.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096], [32000, 4096], [8, 4096, 4096], []], "Ev Idx": 16497 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939324876.421, "dur": 69.649, + "args": { + "External id": 983598,"Record function id": 0, "Ev Idx": 16498 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.0)", "pid": 2338706, "tid": 2338706, + "ts": 6345939324960.563, "dur": 8305.957, + "args": { + "External id": 983599,"Record function id": 0, "Ev Idx": 16499 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338706, "tid": 2338706, + "ts": 6345939324970.677, "dur": 1022.536, + "args": { + "External id": 983600,"Record function id": 0, "Ev Idx": 16500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939325156.651, "dur": 16.021, + "args": { + "External id": 983601,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939325191.595, "dur": 43.710, + "args": { + "External id": 983602,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939325200.483, "dur": 2.924, + "args": { + "External id": 983603,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939325205.675, "dur": 0.646, + "args": { + "External id": 983604,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939325207.711, "dur": 2.387, + "args": { + "External id": 983605,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939325211.031, "dur": 0.565, + "args": { + "External id": 983606,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939325212.747, "dur": 0.838, + "args": { + "External id": 983607,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939325216.812, "dur": 0.478, + "args": { + "External id": 983608,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939325218.341, "dur": 0.673, + "args": { + "External id": 983609,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939325220.063, "dur": 3.659, + "args": { + "External id": 983610,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939325226.875, "dur": 0.612, + "args": { + "External id": 983611,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939325249.225, "dur": 65.791, + "args": { + "External id": 983612,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939325355.781, "dur": 133.980, + "args": { + "External id": 983613,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939325370.649, "dur": 5.711, + "args": { + "External id": 983614,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939325382.347, "dur": 11.043, + "args": { + "External id": 983615,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939325387.315, "dur": 5.580, + "args": { + "External id": 983616,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939325391.112, "dur": 0.524, + "args": { + "External id": 983617,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939325400.656, "dur": 27.659, + "args": { + "External id": 983618,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939325402.792, "dur": 0.672, + "args": { + "External id": 983619,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939325404.947, "dur": 0.545, + "args": { + "External id": 983620,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939325408.644, "dur": 0.635, + "args": { + "External id": 983621,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939325410.198, "dur": 0.685, + "args": { + "External id": 983622,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939325412.207, "dur": 3.503, + "args": { + "External id": 983623,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939325416.766, "dur": 0.312, + "args": { + "External id": 983624,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939325418.388, "dur": 0.297, + "args": { + "External id": 983625,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939325421.690, "dur": 0.494, + "args": { + "External id": 983626,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939325423.414, "dur": 0.349, + "args": { + "External id": 983627,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939325440.685, "dur": 37.960, + "args": { + "External id": 983628,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939325552.745, "dur": 337.464, + "args": { + "External id": 983629,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939325588.614, "dur": 296.012, + "args": { + "External id": 983630,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16530, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939325601.518, "dur": 277.679, + "args": { + "External id": 983631,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939325914.990, "dur": 2.645, + "args": { + "External id": 983632,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16532, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338706, "tid": 2338706, + "ts": 6345939326046.426, "dur": 6878.375, + "args": { + "External id": 983633,"Record function id": 0, "Ev Idx": 16533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939326228.347, "dur": 8.706, + "args": { + "External id": 983634,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939326241.470, "dur": 1.432, + "args": { + "External id": 983635,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939326244.801, "dur": 1.030, + "args": { + "External id": 983636,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939326248.158, "dur": 3.768, + "args": { + "External id": 983637,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939326253.619, "dur": 1.441, + "args": { + "External id": 983638,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939326256.428, "dur": 1.140, + "args": { + "External id": 983639,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939326259.566, "dur": 1.240, + "args": { + "External id": 983640,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939326264.806, "dur": 2.855, + "args": { + "External id": 983641,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939326269.652, "dur": 1.152, + "args": { + "External id": 983642,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939326272.450, "dur": 0.911, + "args": { + "External id": 983643,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939326294.224, "dur": 6579.453, + "args": { + "External id": 983644,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939326319.654, "dur": 6544.535, + "args": { + "External id": 983645,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939326338.523, "dur": 17.563, + "args": { + "External id": 983646,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939326362.090, "dur": 6461.502, + "args": { + "External id": 983647,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939326365.084, "dur": 6456.763, + "args": { + "External id": 983648,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939326371.616, "dur": 8.428, + "args": { + "External id": 983649,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939326381.857, "dur": 6436.859, + "args": { + "External id": 983650,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939333184.475, "dur": 45.257, + "args": { + "External id": 983651,"Sequence number": 10552472, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16551 + } + }, + { + "ph": "s", "id": 447, "pid": 2338706, "tid": 2338706, "ts": 6345939333184.475, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939333211.384, "dur": 11.680, + "args": { + "External id": 983652,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939333216.437, "dur": 6.159, + "args": { + "External id": 983653,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939333325.277, "dur": 121.792, + "args": { + "External id": 983654,"Record function id": 0, "Ev Idx": 16554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939333448.663, "dur": 1409.165, + "args": { + "External id": 983655,"Record function id": 0, "Ev Idx": 16555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939333500.730, "dur": 1339.860, + "args": { + "External id": 983656,"Sequence number": 10552473, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16556 + } + }, + { + "ph": "s", "id": 446, "pid": 2338706, "tid": 2338706, "ts": 6345939333500.730, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939333596.047, "dur": 63.330, + "args": { + "External id": 983657,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939333677.400, "dur": 122.082, + "args": { + "External id": 983658,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939333815.572, "dur": 46.352, + "args": { + "External id": 983659,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939333871.998, "dur": 35.550, + "args": { + "External id": 983660,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939333939.284, "dur": 35.364, + "args": { + "External id": 983661,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939333999.385, "dur": 49.344, + "args": { + "External id": 983662,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939334125.399, "dur": 178.419, + "args": { + "External id": 983663,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939334195.301, "dur": 19.358, + "args": { + "External id": 983664,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939334201.885, "dur": 10.518, + "args": { + "External id": 983665,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939334217.968, "dur": 5.117, + "args": { + "External id": 983666,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939334224.503, "dur": 1.286, + "args": { + "External id": 983667,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939334229.239, "dur": 3.857, + "args": { + "External id": 983668,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939334319.299, "dur": 70.760, + "args": { + "External id": 983669,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939334438.134, "dur": 39.366, + "args": { + "External id": 983670,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939334486.750, "dur": 53.549, + "args": { + "External id": 983671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939334550.613, "dur": 40.651, + "args": { + "External id": 983672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939334622.523, "dur": 32.192, + "args": { + "External id": 983673,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939334663.799, "dur": 43.410, + "args": { + "External id": 983674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939334730.992, "dur": 23.713, + "args": { + "External id": 983675,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16575 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.0)", "pid": 2338706, "tid": 2338706, + "ts": 6345939334929.580, "dur": 120.937, + "args": { + "External id": 983676,"Record function id": 0, "Ev Idx": 16576 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939335191.898, "dur": 56.810, + "args": { + "External id": 983677,"Record function id": 0, "Ev Idx": 16577 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.1)", "pid": 2338706, "tid": 2338706, + "ts": 6345939335259.464, "dur": 27498.775, + "args": { + "External id": 983678,"Record function id": 0, "Ev Idx": 16578 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338706, "tid": 2338706, + "ts": 6345939335269.294, "dur": 1187.791, + "args": { + "External id": 983679,"Record function id": 0, "Ev Idx": 16579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939335369.777, "dur": 12.128, + "args": { + "External id": 983680,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939335399.276, "dur": 39.123, + "args": { + "External id": 983681,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939335405.515, "dur": 4.019, + "args": { + "External id": 983682,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939335412.463, "dur": 0.618, + "args": { + "External id": 983683,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939335414.733, "dur": 0.649, + "args": { + "External id": 983684,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939335418.161, "dur": 0.672, + "args": { + "External id": 983685,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939335419.689, "dur": 2.681, + "args": { + "External id": 983686,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939335423.714, "dur": 0.570, + "args": { + "External id": 983687,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939335425.969, "dur": 0.527, + "args": { + "External id": 983688,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939335427.760, "dur": 0.251, + "args": { + "External id": 983689,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939335428.994, "dur": 1.392, + "args": { + "External id": 983690,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939335451.156, "dur": 64.610, + "args": { + "External id": 983691,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939335564.799, "dur": 135.650, + "args": { + "External id": 983692,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939335578.614, "dur": 6.292, + "args": { + "External id": 983693,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939335591.087, "dur": 11.282, + "args": { + "External id": 983694,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939335596.121, "dur": 5.738, + "args": { + "External id": 983695,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939335599.716, "dur": 0.886, + "args": { + "External id": 983696,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939335610.190, "dur": 24.486, + "args": { + "External id": 983697,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939335612.154, "dur": 0.536, + "args": { + "External id": 983698,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939335613.971, "dur": 3.743, + "args": { + "External id": 983699,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939335618.707, "dur": 0.381, + "args": { + "External id": 983700,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939335620.271, "dur": 0.497, + "args": { + "External id": 983701,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939335622.848, "dur": 0.286, + "args": { + "External id": 983702,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939335624.076, "dur": 0.376, + "args": { + "External id": 983703,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939335625.270, "dur": 0.714, + "args": { + "External id": 983704,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939335628.233, "dur": 0.339, + "args": { + "External id": 983705,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939335629.407, "dur": 0.553, + "args": { + "External id": 983706,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939335648.942, "dur": 39.683, + "args": { + "External id": 983707,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939335766.204, "dur": 563.800, + "args": { + "External id": 983708,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939335802.473, "dur": 520.464, + "args": { + "External id": 983709,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16609, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939335814.282, "dur": 501.541, + "args": { + "External id": 983710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939336362.511, "dur": 2.927, + "args": { + "External id": 983711,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16611, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338706, "tid": 2338706, + "ts": 6345939336481.711, "dur": 26000.010, + "args": { + "External id": 983712,"Record function id": 0, "Ev Idx": 16612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939336609.131, "dur": 7.954, + "args": { + "External id": 983713,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939336621.239, "dur": 1.251, + "args": { + "External id": 983714,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939336624.572, "dur": 1.418, + "args": { + "External id": 983715,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939336628.059, "dur": 1.114, + "args": { + "External id": 983716,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939336630.695, "dur": 1.895, + "args": { + "External id": 983717,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939336636.356, "dur": 1.529, + "args": { + "External id": 983718,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939336639.719, "dur": 0.854, + "args": { + "External id": 983719,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939336642.127, "dur": 4.825, + "args": { + "External id": 983720,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939336648.538, "dur": 0.715, + "args": { + "External id": 983721,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939336654.236, "dur": 1.137, + "args": { + "External id": 983722,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939336675.892, "dur": 25744.851, + "args": { + "External id": 983723,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939336694.339, "dur": 25715.889, + "args": { + "External id": 983724,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939336711.713, "dur": 18.167, + "args": { + "External id": 983725,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939336734.051, "dur": 25626.634, + "args": { + "External id": 983726,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939336737.086, "dur": 25622.157, + "args": { + "External id": 983727,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939336742.685, "dur": 6.257, + "args": { + "External id": 983728,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939336750.736, "dur": 25604.214, + "args": { + "External id": 983729,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939362679.764, "dur": 43.512, + "args": { + "External id": 983730,"Sequence number": 10552474, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16630 + } + }, + { + "ph": "s", "id": 445, "pid": 2338706, "tid": 2338706, "ts": 6345939362679.764, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939362705.062, "dur": 11.676, + "args": { + "External id": 983731,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939362710.248, "dur": 6.240, + "args": { + "External id": 983732,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939362815.935, "dur": 92.543, + "args": { + "External id": 983733,"Record function id": 0, "Ev Idx": 16633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939362910.052, "dur": 1422.012, + "args": { + "External id": 983734,"Record function id": 0, "Ev Idx": 16634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939362956.113, "dur": 1358.502, + "args": { + "External id": 983735,"Sequence number": 10552475, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16635 + } + }, + { + "ph": "s", "id": 444, "pid": 2338706, "tid": 2338706, "ts": 6345939362956.113, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939363104.127, "dur": 65.734, + "args": { + "External id": 983736,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939363191.865, "dur": 118.263, + "args": { + "External id": 983737,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939363323.373, "dur": 43.314, + "args": { + "External id": 983738,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939363376.454, "dur": 34.471, + "args": { + "External id": 983739,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939363444.101, "dur": 30.331, + "args": { + "External id": 983740,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939363505.567, "dur": 23.621, + "args": { + "External id": 983741,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939363554.712, "dur": 164.657, + "args": { + "External id": 983742,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939363615.668, "dur": 21.813, + "args": { + "External id": 983743,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939363622.787, "dur": 12.576, + "args": { + "External id": 983744,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939363640.641, "dur": 4.989, + "args": { + "External id": 983745,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939363646.993, "dur": 1.534, + "args": { + "External id": 983746,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939363651.567, "dur": 4.844, + "args": { + "External id": 983747,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939363734.731, "dur": 59.642, + "args": { + "External id": 983748,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939363837.699, "dur": 35.708, + "args": { + "External id": 983749,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939363885.932, "dur": 51.934, + "args": { + "External id": 983750,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939363946.331, "dur": 43.819, + "args": { + "External id": 983751,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939364041.043, "dur": 77.829, + "args": { + "External id": 983752,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939364131.450, "dur": 50.997, + "args": { + "External id": 983753,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939364209.797, "dur": 23.127, + "args": { + "External id": 983754,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16654 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.1)", "pid": 2338706, "tid": 2338706, + "ts": 6345939364408.237, "dur": 94.661, + "args": { + "External id": 983755,"Record function id": 0, "Ev Idx": 16655 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939364596.781, "dur": 53.057, + "args": { + "External id": 983756,"Record function id": 0, "Ev Idx": 16656 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.2)", "pid": 2338706, "tid": 2338706, + "ts": 6345939364659.913, "dur": 26221.251, + "args": { + "External id": 983757,"Record function id": 0, "Ev Idx": 16657 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338706, "tid": 2338706, + "ts": 6345939364670.610, "dur": 1115.540, + "args": { + "External id": 983758,"Record function id": 0, "Ev Idx": 16658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939364768.970, "dur": 11.227, + "args": { + "External id": 983759,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939364797.068, "dur": 41.656, + "args": { + "External id": 983760,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939364804.824, "dur": 2.539, + "args": { + "External id": 983761,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939364810.089, "dur": 0.683, + "args": { + "External id": 983762,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939364813.124, "dur": 0.447, + "args": { + "External id": 983763,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939364815.405, "dur": 0.577, + "args": { + "External id": 983764,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939364818.142, "dur": 0.714, + "args": { + "External id": 983765,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939364820.919, "dur": 0.905, + "args": { + "External id": 983766,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939364824.124, "dur": 2.833, + "args": { + "External id": 983767,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939364828.749, "dur": 0.612, + "args": { + "External id": 983768,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939364831.420, "dur": 0.471, + "args": { + "External id": 983769,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939364852.028, "dur": 66.943, + "args": { + "External id": 983770,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939364959.168, "dur": 225.288, + "args": { + "External id": 983771,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939364973.784, "dur": 5.113, + "args": { + "External id": 983772,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939364985.506, "dur": 13.057, + "args": { + "External id": 983773,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939364990.925, "dur": 7.130, + "args": { + "External id": 983774,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939364995.786, "dur": 0.693, + "args": { + "External id": 983775,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939365029.631, "dur": 77.859, + "args": { + "External id": 983776,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939365035.806, "dur": 0.768, + "args": { + "External id": 983777,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939365038.814, "dur": 0.664, + "args": { + "External id": 983778,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939365041.720, "dur": 0.560, + "args": { + "External id": 983779,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939365044.306, "dur": 2.922, + "args": { + "External id": 983780,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939365049.801, "dur": 0.549, + "args": { + "External id": 983781,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939365091.010, "dur": 1.065, + "args": { + "External id": 983782,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939365096.841, "dur": 0.570, + "args": { + "External id": 983783,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939365099.238, "dur": 0.459, + "args": { + "External id": 983784,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939365101.651, "dur": 0.475, + "args": { + "External id": 983785,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939365124.120, "dur": 49.193, + "args": { + "External id": 983786,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939365254.940, "dur": 423.438, + "args": { + "External id": 983787,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939365283.770, "dur": 389.178, + "args": { + "External id": 983788,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16688, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939365296.427, "dur": 370.575, + "args": { + "External id": 983789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939365701.121, "dur": 2.926, + "args": { + "External id": 983790,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16690, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338706, "tid": 2338706, + "ts": 6345939365811.251, "dur": 24801.144, + "args": { + "External id": 983791,"Record function id": 0, "Ev Idx": 16691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939365949.558, "dur": 8.644, + "args": { + "External id": 983792,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939365962.217, "dur": 1.376, + "args": { + "External id": 983793,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939365965.762, "dur": 3.774, + "args": { + "External id": 983794,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939365971.376, "dur": 1.239, + "args": { + "External id": 983795,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939365974.267, "dur": 1.367, + "args": { + "External id": 983796,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939365979.551, "dur": 1.245, + "args": { + "External id": 983797,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939365983.242, "dur": 1.387, + "args": { + "External id": 983798,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939365986.113, "dur": 3.033, + "args": { + "External id": 983799,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939365991.131, "dur": 1.185, + "args": { + "External id": 983800,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939365993.943, "dur": 1.119, + "args": { + "External id": 983801,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939366038.779, "dur": 24516.529, + "args": { + "External id": 983802,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939366097.759, "dur": 24447.286, + "args": { + "External id": 983803,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939366117.284, "dur": 21.113, + "args": { + "External id": 983804,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939366144.401, "dur": 24358.263, + "args": { + "External id": 983805,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939366147.483, "dur": 24353.460, + "args": { + "External id": 983806,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939366152.544, "dur": 6.689, + "args": { + "External id": 983807,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939366161.557, "dur": 24335.987, + "args": { + "External id": 983808,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939390803.501, "dur": 46.082, + "args": { + "External id": 983809,"Sequence number": 10552476, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16709 + } + }, + { + "ph": "s", "id": 443, "pid": 2338706, "tid": 2338706, "ts": 6345939390803.501, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939390828.784, "dur": 11.833, + "args": { + "External id": 983810,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939390833.802, "dur": 6.502, + "args": { + "External id": 983811,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939390935.418, "dur": 104.069, + "args": { + "External id": 983812,"Record function id": 0, "Ev Idx": 16712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939391042.419, "dur": 1402.911, + "args": { + "External id": 983813,"Record function id": 0, "Ev Idx": 16713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939391132.546, "dur": 1294.312, + "args": { + "External id": 983814,"Sequence number": 10552477, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16714 + } + }, + { + "ph": "s", "id": 442, "pid": 2338706, "tid": 2338706, "ts": 6345939391132.546, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939391222.927, "dur": 59.040, + "args": { + "External id": 983815,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939391300.317, "dur": 121.052, + "args": { + "External id": 983816,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939391436.915, "dur": 44.889, + "args": { + "External id": 983817,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939391490.804, "dur": 37.051, + "args": { + "External id": 983818,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939391556.173, "dur": 31.188, + "args": { + "External id": 983819,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939391611.337, "dur": 23.315, + "args": { + "External id": 983820,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939391660.773, "dur": 166.749, + "args": { + "External id": 983821,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939391719.881, "dur": 16.122, + "args": { + "External id": 983822,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939391726.915, "dur": 8.063, + "args": { + "External id": 983823,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939391740.720, "dur": 5.581, + "args": { + "External id": 983824,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939391748.668, "dur": 1.334, + "args": { + "External id": 983825,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939391753.600, "dur": 8.249, + "args": { + "External id": 983826,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939391844.375, "dur": 59.901, + "args": { + "External id": 983827,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939391940.404, "dur": 36.000, + "args": { + "External id": 983828,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939391989.701, "dur": 116.289, + "args": { + "External id": 983829,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939392122.307, "dur": 49.852, + "args": { + "External id": 983830,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939392199.687, "dur": 35.440, + "args": { + "External id": 983831,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939392243.842, "dur": 43.767, + "args": { + "External id": 983832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939392309.602, "dur": 22.039, + "args": { + "External id": 983833,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16733 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.2)", "pid": 2338706, "tid": 2338706, + "ts": 6345939392524.488, "dur": 99.026, + "args": { + "External id": 983834,"Record function id": 0, "Ev Idx": 16734 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939392710.927, "dur": 52.754, + "args": { + "External id": 983835,"Record function id": 0, "Ev Idx": 16735 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.3)", "pid": 2338706, "tid": 2338706, + "ts": 6345939392775.207, "dur": 29683.739, + "args": { + "External id": 983836,"Record function id": 0, "Ev Idx": 16736 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338706, "tid": 2338706, + "ts": 6345939392786.240, "dur": 1107.194, + "args": { + "External id": 983837,"Record function id": 0, "Ev Idx": 16737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939392883.940, "dur": 11.637, + "args": { + "External id": 983838,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939392912.708, "dur": 40.173, + "args": { + "External id": 983839,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939392919.545, "dur": 2.796, + "args": { + "External id": 983840,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939392925.361, "dur": 0.541, + "args": { + "External id": 983841,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939392927.776, "dur": 0.482, + "args": { + "External id": 983842,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939392930.329, "dur": 0.561, + "args": { + "External id": 983843,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939392933.035, "dur": 0.534, + "args": { + "External id": 983844,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939392935.982, "dur": 0.415, + "args": { + "External id": 983845,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939392937.938, "dur": 2.956, + "args": { + "External id": 983846,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939392943.088, "dur": 0.671, + "args": { + "External id": 983847,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939392945.338, "dur": 0.480, + "args": { + "External id": 983848,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939392966.858, "dur": 134.960, + "args": { + "External id": 983849,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939393151.156, "dur": 158.966, + "args": { + "External id": 983850,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939393166.745, "dur": 6.463, + "args": { + "External id": 983851,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939393180.062, "dur": 14.062, + "args": { + "External id": 983852,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939393185.348, "dur": 8.289, + "args": { + "External id": 983853,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939393190.423, "dur": 1.284, + "args": { + "External id": 983854,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939393202.908, "dur": 34.205, + "args": { + "External id": 983855,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939393206.093, "dur": 0.664, + "args": { + "External id": 983856,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939393209.787, "dur": 0.674, + "args": { + "External id": 983857,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939393212.301, "dur": 0.444, + "args": { + "External id": 983858,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939393214.601, "dur": 2.973, + "args": { + "External id": 983859,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939393219.482, "dur": 0.527, + "args": { + "External id": 983860,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939393222.916, "dur": 0.479, + "args": { + "External id": 983861,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939393225.197, "dur": 0.494, + "args": { + "External id": 983862,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939393228.144, "dur": 0.610, + "args": { + "External id": 983863,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939393230.462, "dur": 0.592, + "args": { + "External id": 983864,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939393254.035, "dur": 45.370, + "args": { + "External id": 983865,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939393380.506, "dur": 411.076, + "args": { + "External id": 983866,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939393407.867, "dur": 378.287, + "args": { + "External id": 983867,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16767, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939393420.349, "dur": 358.623, + "args": { + "External id": 983868,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939393813.488, "dur": 2.709, + "args": { + "External id": 983869,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16769, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338706, "tid": 2338706, + "ts": 6345939393916.450, "dur": 28286.718, + "args": { + "External id": 983870,"Record function id": 0, "Ev Idx": 16770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939394107.019, "dur": 9.343, + "args": { + "External id": 983871,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939394121.148, "dur": 1.524, + "args": { + "External id": 983872,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939394124.564, "dur": 4.078, + "args": { + "External id": 983873,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939394130.706, "dur": 1.288, + "args": { + "External id": 983874,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939394133.748, "dur": 1.186, + "args": { + "External id": 983875,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939394136.842, "dur": 1.206, + "args": { + "External id": 983876,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939394139.679, "dur": 1.551, + "args": { + "External id": 983877,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939394142.844, "dur": 3.705, + "args": { + "External id": 983878,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939394148.277, "dur": 1.176, + "args": { + "External id": 983879,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939394151.222, "dur": 0.960, + "args": { + "External id": 983880,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939394175.014, "dur": 27968.427, + "args": { + "External id": 983881,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939394193.991, "dur": 27938.916, + "args": { + "External id": 983882,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939394223.646, "dur": 20.531, + "args": { + "External id": 983883,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939394250.089, "dur": 27836.974, + "args": { + "External id": 983884,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939394253.202, "dur": 27832.125, + "args": { + "External id": 983885,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939394258.215, "dur": 6.063, + "args": { + "External id": 983886,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939394266.629, "dur": 27815.231, + "args": { + "External id": 983887,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939422391.994, "dur": 38.194, + "args": { + "External id": 983888,"Sequence number": 10552478, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16788 + } + }, + { + "ph": "s", "id": 441, "pid": 2338706, "tid": 2338706, "ts": 6345939422391.994, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939422411.835, "dur": 12.527, + "args": { + "External id": 983889,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939422417.660, "dur": 6.402, + "args": { + "External id": 983890,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939422504.913, "dur": 88.069, + "args": { + "External id": 983891,"Record function id": 0, "Ev Idx": 16791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939422594.490, "dur": 1315.205, + "args": { + "External id": 983892,"Record function id": 0, "Ev Idx": 16792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939422642.201, "dur": 1250.732, + "args": { + "External id": 983893,"Sequence number": 10552479, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16793 + } + }, + { + "ph": "s", "id": 440, "pid": 2338706, "tid": 2338706, "ts": 6345939422642.201, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939422719.748, "dur": 53.261, + "args": { + "External id": 983894,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939422789.816, "dur": 119.171, + "args": { + "External id": 983895,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939422926.075, "dur": 46.503, + "args": { + "External id": 983896,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939422982.483, "dur": 57.611, + "args": { + "External id": 983897,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939423115.972, "dur": 36.138, + "args": { + "External id": 983898,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939423177.364, "dur": 21.289, + "args": { + "External id": 983899,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939423225.388, "dur": 172.891, + "args": { + "External id": 983900,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939423284.277, "dur": 15.617, + "args": { + "External id": 983901,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939423291.584, "dur": 7.278, + "args": { + "External id": 983902,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939423315.580, "dur": 6.600, + "args": { + "External id": 983903,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939423324.192, "dur": 2.552, + "args": { + "External id": 983904,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939423330.157, "dur": 5.624, + "args": { + "External id": 983905,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939423415.088, "dur": 69.627, + "args": { + "External id": 983906,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939423520.661, "dur": 35.217, + "args": { + "External id": 983907,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939423568.363, "dur": 50.293, + "args": { + "External id": 983908,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939423627.198, "dur": 43.414, + "args": { + "External id": 983909,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939423693.702, "dur": 30.512, + "args": { + "External id": 983910,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939423732.414, "dur": 42.711, + "args": { + "External id": 983911,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939423793.979, "dur": 21.087, + "args": { + "External id": 983912,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16812 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.3)", "pid": 2338706, "tid": 2338706, + "ts": 6345939423992.464, "dur": 162.421, + "args": { + "External id": 983913,"Record function id": 0, "Ev Idx": 16813 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939424252.628, "dur": 57.292, + "args": { + "External id": 983914,"Record function id": 0, "Ev Idx": 16814 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.4)", "pid": 2338706, "tid": 2338706, + "ts": 6345939424321.962, "dur": 29175.237, + "args": { + "External id": 983915,"Record function id": 0, "Ev Idx": 16815 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338706, "tid": 2338706, + "ts": 6345939424333.774, "dur": 1116.247, + "args": { + "External id": 983916,"Record function id": 0, "Ev Idx": 16816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939424435.415, "dur": 10.384, + "args": { + "External id": 983917,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939424462.276, "dur": 41.162, + "args": { + "External id": 983918,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939424469.355, "dur": 2.612, + "args": { + "External id": 983919,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939424474.902, "dur": 1.010, + "args": { + "External id": 983920,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939424477.988, "dur": 0.458, + "args": { + "External id": 983921,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939424480.235, "dur": 0.677, + "args": { + "External id": 983922,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939424482.815, "dur": 0.596, + "args": { + "External id": 983923,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939424485.338, "dur": 0.708, + "args": { + "External id": 983924,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939424488.256, "dur": 2.840, + "args": { + "External id": 983925,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939424492.934, "dur": 0.519, + "args": { + "External id": 983926,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939424495.181, "dur": 0.453, + "args": { + "External id": 983927,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939424516.076, "dur": 66.652, + "args": { + "External id": 983928,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939424622.530, "dur": 144.464, + "args": { + "External id": 983929,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939424635.522, "dur": 4.533, + "args": { + "External id": 983930,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939424646.866, "dur": 13.109, + "args": { + "External id": 983931,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939424652.248, "dur": 7.257, + "args": { + "External id": 983932,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939424657.186, "dur": 0.583, + "args": { + "External id": 983933,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939424668.229, "dur": 37.650, + "args": { + "External id": 983934,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939424671.401, "dur": 0.620, + "args": { + "External id": 983935,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939424681.981, "dur": 0.732, + "args": { + "External id": 983936,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939424684.866, "dur": 0.514, + "args": { + "External id": 983937,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939424687.035, "dur": 2.747, + "args": { + "External id": 983938,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939424691.541, "dur": 0.480, + "args": { + "External id": 983939,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939424693.556, "dur": 0.427, + "args": { + "External id": 983940,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939424695.761, "dur": 0.413, + "args": { + "External id": 983941,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939424697.878, "dur": 0.673, + "args": { + "External id": 983942,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939424700.352, "dur": 0.463, + "args": { + "External id": 983943,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939424718.221, "dur": 39.344, + "args": { + "External id": 983944,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939424832.042, "dur": 503.252, + "args": { + "External id": 983945,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939424857.927, "dur": 470.917, + "args": { + "External id": 983946,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16846, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939424869.178, "dur": 452.625, + "args": { + "External id": 983947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939425361.070, "dur": 3.148, + "args": { + "External id": 983948,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16848, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338706, "tid": 2338706, + "ts": 6345939425473.835, "dur": 27773.319, + "args": { + "External id": 983949,"Record function id": 0, "Ev Idx": 16849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939425596.317, "dur": 7.318, + "args": { + "External id": 983950,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939425607.785, "dur": 1.572, + "args": { + "External id": 983951,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939425611.805, "dur": 3.956, + "args": { + "External id": 983952,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939425617.401, "dur": 1.253, + "args": { + "External id": 983953,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939425620.320, "dur": 1.342, + "args": { + "External id": 983954,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939425623.444, "dur": 1.256, + "args": { + "External id": 983955,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939425626.890, "dur": 1.045, + "args": { + "External id": 983956,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939425629.485, "dur": 3.554, + "args": { + "External id": 983957,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939425634.522, "dur": 1.250, + "args": { + "External id": 983958,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939425637.339, "dur": 1.046, + "args": { + "External id": 983959,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939425658.093, "dur": 27532.991, + "args": { + "External id": 983960,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939425677.673, "dur": 27503.366, + "args": { + "External id": 983961,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939425695.549, "dur": 19.276, + "args": { + "External id": 983962,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939425719.139, "dur": 27417.604, + "args": { + "External id": 983963,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939425722.564, "dur": 27412.457, + "args": { + "External id": 983964,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939425727.563, "dur": 6.067, + "args": { + "External id": 983965,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939425735.730, "dur": 27395.880, + "args": { + "External id": 983966,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939453427.496, "dur": 38.962, + "args": { + "External id": 983967,"Sequence number": 10552480, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16867 + } + }, + { + "ph": "s", "id": 439, "pid": 2338706, "tid": 2338706, "ts": 6345939453427.496, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939453448.485, "dur": 11.983, + "args": { + "External id": 983968,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939453453.956, "dur": 6.220, + "args": { + "External id": 983969,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939453545.763, "dur": 87.274, + "args": { + "External id": 983970,"Record function id": 0, "Ev Idx": 16870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939453635.095, "dur": 1323.907, + "args": { + "External id": 983971,"Record function id": 0, "Ev Idx": 16871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939453682.590, "dur": 1258.151, + "args": { + "External id": 983972,"Sequence number": 10552481, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16872 + } + }, + { + "ph": "s", "id": 438, "pid": 2338706, "tid": 2338706, "ts": 6345939453682.590, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939453768.961, "dur": 57.753, + "args": { + "External id": 983973,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939453843.737, "dur": 117.303, + "args": { + "External id": 983974,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939453977.656, "dur": 68.163, + "args": { + "External id": 983975,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939454102.339, "dur": 49.165, + "args": { + "External id": 983976,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939454183.883, "dur": 36.024, + "args": { + "External id": 983977,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939454241.242, "dur": 24.102, + "args": { + "External id": 983978,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939454291.040, "dur": 163.010, + "args": { + "External id": 983979,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939454351.362, "dur": 16.016, + "args": { + "External id": 983980,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939454358.725, "dur": 7.600, + "args": { + "External id": 983981,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939454371.874, "dur": 5.046, + "args": { + "External id": 983982,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939454378.799, "dur": 1.417, + "args": { + "External id": 983983,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939454383.291, "dur": 8.073, + "args": { + "External id": 983984,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939454468.206, "dur": 57.493, + "args": { + "External id": 983985,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939454558.101, "dur": 33.504, + "args": { + "External id": 983986,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939454603.653, "dur": 50.113, + "args": { + "External id": 983987,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939454662.352, "dur": 41.315, + "args": { + "External id": 983988,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939454726.364, "dur": 31.116, + "args": { + "External id": 983989,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939454766.976, "dur": 41.913, + "args": { + "External id": 983990,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939454828.007, "dur": 22.560, + "args": { + "External id": 983991,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16891 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.4)", "pid": 2338706, "tid": 2338706, + "ts": 6345939455096.154, "dur": 101.659, + "args": { + "External id": 983992,"Record function id": 0, "Ev Idx": 16892 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939455289.671, "dur": 53.163, + "args": { + "External id": 983993,"Record function id": 0, "Ev Idx": 16893 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.5)", "pid": 2338706, "tid": 2338706, + "ts": 6345939455353.328, "dur": 31376.797, + "args": { + "External id": 983994,"Record function id": 0, "Ev Idx": 16894 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338706, "tid": 2338706, + "ts": 6345939455365.511, "dur": 1106.022, + "args": { + "External id": 983995,"Record function id": 0, "Ev Idx": 16895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939455461.197, "dur": 12.032, + "args": { + "External id": 983996,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939455489.528, "dur": 40.799, + "args": { + "External id": 983997,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939455496.502, "dur": 2.566, + "args": { + "External id": 983998,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939455502.116, "dur": 0.612, + "args": { + "External id": 983999,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939455505.214, "dur": 0.803, + "args": { + "External id": 984000,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939455507.833, "dur": 0.532, + "args": { + "External id": 984001,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939455509.883, "dur": 0.904, + "args": { + "External id": 984002,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939455512.449, "dur": 0.416, + "args": { + "External id": 984003,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939455514.757, "dur": 2.884, + "args": { + "External id": 984004,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939455519.416, "dur": 0.454, + "args": { + "External id": 984005,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939455521.427, "dur": 0.389, + "args": { + "External id": 984006,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939455543.010, "dur": 65.985, + "args": { + "External id": 984007,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939455647.367, "dur": 150.864, + "args": { + "External id": 984008,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939455661.479, "dur": 4.777, + "args": { + "External id": 984009,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939455673.063, "dur": 12.324, + "args": { + "External id": 984010,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939455678.393, "dur": 6.467, + "args": { + "External id": 984011,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939455682.560, "dur": 0.807, + "args": { + "External id": 984012,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939455693.400, "dur": 38.056, + "args": { + "External id": 984013,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939455696.156, "dur": 0.924, + "args": { + "External id": 984014,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939455698.877, "dur": 0.444, + "args": { + "External id": 984015,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939455701.296, "dur": 0.580, + "args": { + "External id": 984016,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939455703.734, "dur": 3.137, + "args": { + "External id": 984017,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939455716.760, "dur": 0.497, + "args": { + "External id": 984018,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939455719.302, "dur": 0.528, + "args": { + "External id": 984019,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939455721.658, "dur": 0.423, + "args": { + "External id": 984020,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939455723.719, "dur": 0.438, + "args": { + "External id": 984021,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939455725.796, "dur": 0.481, + "args": { + "External id": 984022,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939455746.622, "dur": 42.196, + "args": { + "External id": 984023,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939455860.716, "dur": 490.409, + "args": { + "External id": 984024,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939455886.762, "dur": 457.791, + "args": { + "External id": 984025,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16925, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939455898.746, "dur": 438.731, + "args": { + "External id": 984026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939456380.553, "dur": 3.166, + "args": { + "External id": 984027,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16927, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338706, "tid": 2338706, + "ts": 6345939456496.954, "dur": 29997.030, + "args": { + "External id": 984028,"Record function id": 0, "Ev Idx": 16928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939456624.198, "dur": 7.381, + "args": { + "External id": 984029,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939456635.575, "dur": 1.395, + "args": { + "External id": 984030,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939456639.023, "dur": 3.918, + "args": { + "External id": 984031,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939456645.105, "dur": 1.253, + "args": { + "External id": 984032,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939456648.047, "dur": 1.151, + "args": { + "External id": 984033,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939456650.848, "dur": 1.410, + "args": { + "External id": 984034,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939456653.982, "dur": 1.100, + "args": { + "External id": 984035,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939456657.812, "dur": 3.654, + "args": { + "External id": 984036,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939456662.924, "dur": 1.268, + "args": { + "External id": 984037,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939456665.942, "dur": 0.920, + "args": { + "External id": 984038,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939456688.647, "dur": 29751.076, + "args": { + "External id": 984039,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939456708.019, "dur": 29721.045, + "args": { + "External id": 984040,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939456728.203, "dur": 17.986, + "args": { + "External id": 984041,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939456750.079, "dur": 29634.007, + "args": { + "External id": 984042,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939456753.070, "dur": 29630.192, + "args": { + "External id": 984043,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939456758.111, "dur": 6.140, + "args": { + "External id": 984044,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939456766.273, "dur": 29612.267, + "args": { + "External id": 984045,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939486664.105, "dur": 36.885, + "args": { + "External id": 984046,"Sequence number": 10552482, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16946 + } + }, + { + "ph": "s", "id": 437, "pid": 2338706, "tid": 2338706, "ts": 6345939486664.105, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939486684.559, "dur": 11.050, + "args": { + "External id": 984047,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939486689.888, "dur": 5.413, + "args": { + "External id": 984048,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939486780.104, "dur": 84.847, + "args": { + "External id": 984049,"Record function id": 0, "Ev Idx": 16949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939486866.729, "dur": 1362.101, + "args": { + "External id": 984050,"Record function id": 0, "Ev Idx": 16950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939486913.168, "dur": 1298.394, + "args": { + "External id": 984051,"Sequence number": 10552483, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16951 + } + }, + { + "ph": "s", "id": 436, "pid": 2338706, "tid": 2338706, "ts": 6345939486913.168, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939486996.090, "dur": 111.675, + "args": { + "External id": 984052,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939487129.887, "dur": 118.850, + "args": { + "External id": 984053,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939487264.201, "dur": 42.102, + "args": { + "External id": 984054,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939487314.353, "dur": 33.407, + "args": { + "External id": 984055,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939487381.408, "dur": 31.577, + "args": { + "External id": 984056,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939487437.815, "dur": 19.875, + "args": { + "External id": 984057,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939487483.676, "dur": 153.907, + "args": { + "External id": 984058,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939487541.264, "dur": 13.329, + "args": { + "External id": 984059,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939487547.825, "dur": 5.797, + "args": { + "External id": 984060,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939487557.898, "dur": 4.885, + "args": { + "External id": 984061,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939487564.668, "dur": 1.315, + "args": { + "External id": 984062,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939487569.130, "dur": 10.015, + "args": { + "External id": 984063,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939487649.971, "dur": 52.160, + "args": { + "External id": 984064,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939487763.414, "dur": 46.961, + "args": { + "External id": 984065,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939487822.795, "dur": 49.431, + "args": { + "External id": 984066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939487880.869, "dur": 39.538, + "args": { + "External id": 984067,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939487944.816, "dur": 27.778, + "args": { + "External id": 984068,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939487979.041, "dur": 59.048, + "args": { + "External id": 984069,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939488102.840, "dur": 27.015, + "args": { + "External id": 984070,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16970 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.5)", "pid": 2338706, "tid": 2338706, + "ts": 6345939488304.902, "dur": 92.826, + "args": { + "External id": 984071,"Record function id": 0, "Ev Idx": 16971 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939488484.305, "dur": 52.967, + "args": { + "External id": 984072,"Record function id": 0, "Ev Idx": 16972 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.6)", "pid": 2338706, "tid": 2338706, + "ts": 6345939488548.942, "dur": 32043.365, + "args": { + "External id": 984073,"Record function id": 0, "Ev Idx": 16973 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338706, "tid": 2338706, + "ts": 6345939488560.076, "dur": 1138.400, + "args": { + "External id": 984074,"Record function id": 0, "Ev Idx": 16974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939488654.444, "dur": 9.709, + "args": { + "External id": 984075,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939488679.892, "dur": 43.732, + "args": { + "External id": 984076,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939488686.573, "dur": 2.559, + "args": { + "External id": 984077,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939488692.135, "dur": 0.615, + "args": { + "External id": 984078,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939488695.005, "dur": 0.896, + "args": { + "External id": 984079,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939488698.357, "dur": 0.620, + "args": { + "External id": 984080,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939488701.147, "dur": 0.737, + "args": { + "External id": 984081,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939488703.837, "dur": 0.847, + "args": { + "External id": 984082,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939488707.196, "dur": 2.939, + "args": { + "External id": 984083,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939488712.426, "dur": 0.586, + "args": { + "External id": 984084,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939488714.840, "dur": 0.624, + "args": { + "External id": 984085,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939488741.791, "dur": 60.575, + "args": { + "External id": 984086,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939488840.377, "dur": 143.308, + "args": { + "External id": 984087,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939488853.881, "dur": 3.715, + "args": { + "External id": 984088,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939488863.720, "dur": 13.171, + "args": { + "External id": 984089,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939488869.091, "dur": 7.310, + "args": { + "External id": 984090,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939488873.960, "dur": 0.939, + "args": { + "External id": 984091,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939488885.068, "dur": 37.283, + "args": { + "External id": 984092,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939488888.320, "dur": 0.510, + "args": { + "External id": 984093,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939488891.387, "dur": 0.611, + "args": { + "External id": 984094,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939488894.545, "dur": 1.005, + "args": { + "External id": 984095,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939488897.555, "dur": 3.516, + "args": { + "External id": 984096,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939488903.449, "dur": 0.762, + "args": { + "External id": 984097,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939488906.445, "dur": 0.746, + "args": { + "External id": 984098,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939488909.245, "dur": 0.592, + "args": { + "External id": 984099,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939488911.655, "dur": 0.625, + "args": { + "External id": 984100,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939488914.370, "dur": 0.579, + "args": { + "External id": 984101,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939488934.827, "dur": 38.804, + "args": { + "External id": 984102,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939489107.211, "dur": 475.417, + "args": { + "External id": 984103,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939489139.315, "dur": 437.186, + "args": { + "External id": 984104,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17004, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939489153.174, "dur": 417.352, + "args": { + "External id": 984105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939489607.200, "dur": 2.991, + "args": { + "External id": 984106,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17006, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338706, "tid": 2338706, + "ts": 6345939489723.331, "dur": 30613.220, + "args": { + "External id": 984107,"Record function id": 0, "Ev Idx": 17007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939489842.327, "dur": 7.701, + "args": { + "External id": 984108,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939489853.802, "dur": 1.062, + "args": { + "External id": 984109,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939489856.788, "dur": 3.904, + "args": { + "External id": 984110,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939489862.631, "dur": 1.266, + "args": { + "External id": 984111,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939489865.623, "dur": 1.320, + "args": { + "External id": 984112,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939489868.416, "dur": 1.464, + "args": { + "External id": 984113,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939489872.179, "dur": 1.150, + "args": { + "External id": 984114,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939489875.380, "dur": 3.901, + "args": { + "External id": 984115,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939489881.410, "dur": 0.700, + "args": { + "External id": 984116,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939489884.307, "dur": 0.964, + "args": { + "External id": 984117,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939489907.994, "dur": 30374.899, + "args": { + "External id": 984118,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939489926.989, "dur": 30345.051, + "args": { + "External id": 984119,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939489943.322, "dur": 19.896, + "args": { + "External id": 984120,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939489967.397, "dur": 30263.771, + "args": { + "External id": 984121,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939489970.474, "dur": 30259.066, + "args": { + "External id": 984122,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939489975.641, "dur": 8.953, + "args": { + "External id": 984123,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939489986.595, "dur": 30238.954, + "args": { + "External id": 984124,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939520515.554, "dur": 44.493, + "args": { + "External id": 984125,"Sequence number": 10552484, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17025 + } + }, + { + "ph": "s", "id": 435, "pid": 2338706, "tid": 2338706, "ts": 6345939520515.554, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939520541.875, "dur": 11.587, + "args": { + "External id": 984126,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939520547.014, "dur": 6.168, + "args": { + "External id": 984127,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939520647.680, "dur": 88.665, + "args": { + "External id": 984128,"Record function id": 0, "Ev Idx": 17028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939520738.071, "dur": 1383.815, + "args": { + "External id": 984129,"Record function id": 0, "Ev Idx": 17029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939520785.696, "dur": 1316.867, + "args": { + "External id": 984130,"Sequence number": 10552485, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17030 + } + }, + { + "ph": "s", "id": 434, "pid": 2338706, "tid": 2338706, "ts": 6345939520785.696, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939520872.877, "dur": 59.907, + "args": { + "External id": 984131,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939520950.889, "dur": 183.054, + "args": { + "External id": 984132,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939521157.090, "dur": 52.140, + "args": { + "External id": 984133,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939521218.915, "dur": 35.340, + "args": { + "External id": 984134,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939521285.948, "dur": 33.668, + "args": { + "External id": 984135,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939521343.193, "dur": 21.946, + "args": { + "External id": 984136,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939521390.419, "dur": 160.885, + "args": { + "External id": 984137,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939521451.613, "dur": 15.351, + "args": { + "External id": 984138,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939521459.064, "dur": 6.890, + "args": { + "External id": 984139,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939521471.661, "dur": 4.429, + "args": { + "External id": 984140,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939521477.894, "dur": 1.313, + "args": { + "External id": 984141,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939521482.201, "dur": 6.065, + "args": { + "External id": 984142,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939521566.177, "dur": 56.814, + "args": { + "External id": 984143,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939521659.993, "dur": 35.194, + "args": { + "External id": 984144,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939521707.037, "dur": 51.706, + "args": { + "External id": 984145,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939521767.146, "dur": 40.639, + "args": { + "External id": 984146,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939521834.806, "dur": 32.922, + "args": { + "External id": 984147,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939521874.372, "dur": 43.592, + "args": { + "External id": 984148,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939521941.224, "dur": 24.215, + "args": { + "External id": 984149,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17049 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.6)", "pid": 2338706, "tid": 2338706, + "ts": 6345939522199.646, "dur": 100.747, + "args": { + "External id": 984150,"Record function id": 0, "Ev Idx": 17050 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939522390.946, "dur": 55.375, + "args": { + "External id": 984151,"Record function id": 0, "Ev Idx": 17051 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.7)", "pid": 2338706, "tid": 2338706, + "ts": 6345939522457.506, "dur": 30414.964, + "args": { + "External id": 984152,"Record function id": 0, "Ev Idx": 17052 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338706, "tid": 2338706, + "ts": 6345939522470.226, "dur": 1105.061, + "args": { + "External id": 984153,"Record function id": 0, "Ev Idx": 17053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939522566.097, "dur": 12.161, + "args": { + "External id": 984154,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939522594.755, "dur": 45.220, + "args": { + "External id": 984155,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939522601.394, "dur": 2.428, + "args": { + "External id": 984156,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939522607.399, "dur": 0.952, + "args": { + "External id": 984157,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939522610.413, "dur": 0.761, + "args": { + "External id": 984158,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939522613.434, "dur": 0.555, + "args": { + "External id": 984159,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939522616.020, "dur": 0.672, + "args": { + "External id": 984160,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939522618.755, "dur": 0.704, + "args": { + "External id": 984161,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939522621.634, "dur": 2.959, + "args": { + "External id": 984162,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939522627.055, "dur": 0.529, + "args": { + "External id": 984163,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939522629.554, "dur": 0.530, + "args": { + "External id": 984164,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939522654.273, "dur": 70.188, + "args": { + "External id": 984165,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939522778.245, "dur": 142.337, + "args": { + "External id": 984166,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939522792.205, "dur": 4.337, + "args": { + "External id": 984167,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939522802.826, "dur": 12.714, + "args": { + "External id": 984168,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939522808.174, "dur": 6.844, + "args": { + "External id": 984169,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939522812.780, "dur": 0.874, + "args": { + "External id": 984170,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939522824.188, "dur": 33.507, + "args": { + "External id": 984171,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939522826.933, "dur": 0.724, + "args": { + "External id": 984172,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939522829.687, "dur": 0.422, + "args": { + "External id": 984173,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939522832.129, "dur": 0.515, + "args": { + "External id": 984174,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939522835.180, "dur": 2.649, + "args": { + "External id": 984175,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939522839.881, "dur": 0.554, + "args": { + "External id": 984176,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939522842.751, "dur": 0.513, + "args": { + "External id": 984177,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939522845.054, "dur": 0.816, + "args": { + "External id": 984178,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939522848.009, "dur": 0.446, + "args": { + "External id": 984179,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939522850.708, "dur": 0.440, + "args": { + "External id": 984180,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939522872.674, "dur": 38.354, + "args": { + "External id": 984181,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939522981.909, "dur": 482.689, + "args": { + "External id": 984182,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939523033.390, "dur": 424.317, + "args": { + "External id": 984183,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17083, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939523047.540, "dur": 403.669, + "args": { + "External id": 984184,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939523489.883, "dur": 2.452, + "args": { + "External id": 984185,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17085, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338706, "tid": 2338706, + "ts": 6345939523598.239, "dur": 29022.073, + "args": { + "External id": 984186,"Record function id": 0, "Ev Idx": 17086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939523719.161, "dur": 7.303, + "args": { + "External id": 984187,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939523730.684, "dur": 1.618, + "args": { + "External id": 984188,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939523734.471, "dur": 3.715, + "args": { + "External id": 984189,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939523740.164, "dur": 1.878, + "args": { + "External id": 984190,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939523743.728, "dur": 1.781, + "args": { + "External id": 984191,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939523747.111, "dur": 1.348, + "args": { + "External id": 984192,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939523750.091, "dur": 1.506, + "args": { + "External id": 984193,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939523753.851, "dur": 3.359, + "args": { + "External id": 984194,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939523758.851, "dur": 1.071, + "args": { + "External id": 984195,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939523761.877, "dur": 1.494, + "args": { + "External id": 984196,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939523783.301, "dur": 28780.468, + "args": { + "External id": 984197,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939523802.462, "dur": 28750.143, + "args": { + "External id": 984198,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939523821.500, "dur": 18.704, + "args": { + "External id": 984199,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939523844.162, "dur": 28661.376, + "args": { + "External id": 984200,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939523847.378, "dur": 28656.407, + "args": { + "External id": 984201,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939523852.241, "dur": 8.883, + "args": { + "External id": 984202,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939523863.246, "dur": 28636.732, + "args": { + "External id": 984203,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939552799.703, "dur": 40.046, + "args": { + "External id": 984204,"Sequence number": 10552486, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17104 + } + }, + { + "ph": "s", "id": 433, "pid": 2338706, "tid": 2338706, "ts": 6345939552799.703, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939552820.891, "dur": 12.152, + "args": { + "External id": 984205,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939552826.393, "dur": 6.380, + "args": { + "External id": 984206,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939552922.794, "dur": 104.319, + "args": { + "External id": 984207,"Record function id": 0, "Ev Idx": 17107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939553030.175, "dur": 1382.769, + "args": { + "External id": 984208,"Record function id": 0, "Ev Idx": 17108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939553114.847, "dur": 1281.705, + "args": { + "External id": 984209,"Sequence number": 10552487, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17109 + } + }, + { + "ph": "s", "id": 432, "pid": 2338706, "tid": 2338706, "ts": 6345939553114.847, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939553202.114, "dur": 64.150, + "args": { + "External id": 984210,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939553284.625, "dur": 120.327, + "args": { + "External id": 984211,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939553421.672, "dur": 45.231, + "args": { + "External id": 984212,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939553474.738, "dur": 34.333, + "args": { + "External id": 984213,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939553536.821, "dur": 32.059, + "args": { + "External id": 984214,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939553592.540, "dur": 21.655, + "args": { + "External id": 984215,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939553639.501, "dur": 162.845, + "args": { + "External id": 984216,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939553700.473, "dur": 15.804, + "args": { + "External id": 984217,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939553708.169, "dur": 6.988, + "args": { + "External id": 984218,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939553720.798, "dur": 4.773, + "args": { + "External id": 984219,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939553727.598, "dur": 1.106, + "args": { + "External id": 984220,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939553731.934, "dur": 8.364, + "args": { + "External id": 984221,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939553816.058, "dur": 60.736, + "args": { + "External id": 984222,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939553912.397, "dur": 37.924, + "args": { + "External id": 984223,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939553963.297, "dur": 75.498, + "args": { + "External id": 984224,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939554088.429, "dur": 57.301, + "args": { + "External id": 984225,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939554181.380, "dur": 36.957, + "args": { + "External id": 984226,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939554228.179, "dur": 46.596, + "args": { + "External id": 984227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939554294.736, "dur": 23.989, + "args": { + "External id": 984228,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17128 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.7)", "pid": 2338706, "tid": 2338706, + "ts": 6345939554490.676, "dur": 94.806, + "args": { + "External id": 984229,"Record function id": 0, "Ev Idx": 17129 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939554673.338, "dur": 54.854, + "args": { + "External id": 984230,"Record function id": 0, "Ev Idx": 17130 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.8)", "pid": 2338706, "tid": 2338706, + "ts": 6345939554739.577, "dur": 31782.639, + "args": { + "External id": 984231,"Record function id": 0, "Ev Idx": 17131 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338706, "tid": 2338706, + "ts": 6345939554752.349, "dur": 1075.673, + "args": { + "External id": 984232,"Record function id": 0, "Ev Idx": 17132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939554852.581, "dur": 9.543, + "args": { + "External id": 984233,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939554877.883, "dur": 41.639, + "args": { + "External id": 984234,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939554884.929, "dur": 2.519, + "args": { + "External id": 984235,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939554890.605, "dur": 0.593, + "args": { + "External id": 984236,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939554893.326, "dur": 0.612, + "args": { + "External id": 984237,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939554895.249, "dur": 0.588, + "args": { + "External id": 984238,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939554897.987, "dur": 0.585, + "args": { + "External id": 984239,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939554900.068, "dur": 0.382, + "args": { + "External id": 984240,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939554902.585, "dur": 2.901, + "args": { + "External id": 984241,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939554906.696, "dur": 0.513, + "args": { + "External id": 984242,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939554909.366, "dur": 0.798, + "args": { + "External id": 984243,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939554933.022, "dur": 55.879, + "args": { + "External id": 984244,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939555051.147, "dur": 188.982, + "args": { + "External id": 984245,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939555105.895, "dur": 6.873, + "args": { + "External id": 984246,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939555119.445, "dur": 14.843, + "args": { + "External id": 984247,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939555125.027, "dur": 8.750, + "args": { + "External id": 984248,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939555130.026, "dur": 1.596, + "args": { + "External id": 984249,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939555142.757, "dur": 31.764, + "args": { + "External id": 984250,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939555146.001, "dur": 0.695, + "args": { + "External id": 984251,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939555148.447, "dur": 0.731, + "args": { + "External id": 984252,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939555150.942, "dur": 0.538, + "args": { + "External id": 984253,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939555154.066, "dur": 3.270, + "args": { + "External id": 984254,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939555159.127, "dur": 0.440, + "args": { + "External id": 984255,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939555161.320, "dur": 0.492, + "args": { + "External id": 984256,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939555163.475, "dur": 0.511, + "args": { + "External id": 984257,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939555166.121, "dur": 0.404, + "args": { + "External id": 984258,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939555168.027, "dur": 0.440, + "args": { + "External id": 984259,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939555189.257, "dur": 40.086, + "args": { + "External id": 984260,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939555307.193, "dur": 417.162, + "args": { + "External id": 984261,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939555337.125, "dur": 381.635, + "args": { + "External id": 984262,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17162, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939555348.780, "dur": 363.730, + "args": { + "External id": 984263,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939555747.548, "dur": 2.538, + "args": { + "External id": 984264,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17164, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338706, "tid": 2338706, + "ts": 6345939555852.870, "dur": 30411.571, + "args": { + "External id": 984265,"Record function id": 0, "Ev Idx": 17165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939555970.253, "dur": 6.461, + "args": { + "External id": 984266,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939555980.482, "dur": 1.135, + "args": { + "External id": 984267,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939555983.921, "dur": 3.494, + "args": { + "External id": 984268,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939555989.304, "dur": 1.094, + "args": { + "External id": 984269,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939555991.788, "dur": 1.105, + "args": { + "External id": 984270,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939555994.313, "dur": 1.214, + "args": { + "External id": 984271,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939555997.392, "dur": 1.084, + "args": { + "External id": 984272,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939556000.462, "dur": 2.576, + "args": { + "External id": 984273,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939556004.866, "dur": 1.080, + "args": { + "External id": 984274,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939556028.155, "dur": 1.748, + "args": { + "External id": 984275,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939556092.098, "dur": 30117.345, + "args": { + "External id": 984276,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939556115.247, "dur": 30084.314, + "args": { + "External id": 984277,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939556131.793, "dur": 19.656, + "args": { + "External id": 984278,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939556155.612, "dur": 30000.590, + "args": { + "External id": 984279,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939556158.763, "dur": 29995.745, + "args": { + "External id": 984280,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939556163.268, "dur": 6.572, + "args": { + "External id": 984281,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939556171.698, "dur": 29979.259, + "args": { + "External id": 984282,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939586449.735, "dur": 40.634, + "args": { + "External id": 984283,"Sequence number": 10552488, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17183 + } + }, + { + "ph": "s", "id": 431, "pid": 2338706, "tid": 2338706, "ts": 6345939586449.735, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939586472.232, "dur": 11.711, + "args": { + "External id": 984284,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939586477.492, "dur": 6.211, + "args": { + "External id": 984285,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939586573.483, "dur": 86.369, + "args": { + "External id": 984286,"Record function id": 0, "Ev Idx": 17186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939586661.399, "dur": 1339.691, + "args": { + "External id": 984287,"Record function id": 0, "Ev Idx": 17187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939586706.361, "dur": 1277.875, + "args": { + "External id": 984288,"Sequence number": 10552489, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17188 + } + }, + { + "ph": "s", "id": 430, "pid": 2338706, "tid": 2338706, "ts": 6345939586706.361, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939586795.889, "dur": 58.348, + "args": { + "External id": 984289,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939586871.930, "dur": 120.080, + "args": { + "External id": 984290,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939587033.243, "dur": 97.944, + "args": { + "External id": 984291,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939587147.523, "dur": 38.534, + "args": { + "External id": 984292,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939587220.138, "dur": 33.745, + "args": { + "External id": 984293,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939587279.480, "dur": 21.391, + "args": { + "External id": 984294,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939587327.379, "dur": 161.648, + "args": { + "External id": 984295,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939587387.624, "dur": 15.585, + "args": { + "External id": 984296,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939587394.745, "dur": 7.533, + "args": { + "External id": 984297,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939587407.345, "dur": 5.569, + "args": { + "External id": 984298,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939587414.594, "dur": 1.191, + "args": { + "External id": 984299,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939587418.981, "dur": 6.104, + "args": { + "External id": 984300,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939587503.450, "dur": 58.456, + "args": { + "External id": 984301,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939587600.221, "dur": 35.627, + "args": { + "External id": 984302,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939587647.970, "dur": 52.997, + "args": { + "External id": 984303,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939587709.694, "dur": 43.079, + "args": { + "External id": 984304,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939587778.597, "dur": 31.930, + "args": { + "External id": 984305,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939587818.542, "dur": 43.272, + "args": { + "External id": 984306,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939587881.307, "dur": 21.719, + "args": { + "External id": 984307,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17207 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.8)", "pid": 2338706, "tid": 2338706, + "ts": 6345939588144.120, "dur": 96.633, + "args": { + "External id": 984308,"Record function id": 0, "Ev Idx": 17208 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939588334.290, "dur": 56.935, + "args": { + "External id": 984309,"Record function id": 0, "Ev Idx": 17209 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.9)", "pid": 2338706, "tid": 2338706, + "ts": 6345939588403.826, "dur": 31548.974, + "args": { + "External id": 984310,"Record function id": 0, "Ev Idx": 17210 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338706, "tid": 2338706, + "ts": 6345939588415.050, "dur": 1137.472, + "args": { + "External id": 984311,"Record function id": 0, "Ev Idx": 17211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939588515.315, "dur": 12.056, + "args": { + "External id": 984312,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939588544.824, "dur": 39.758, + "args": { + "External id": 984313,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939588551.726, "dur": 2.823, + "args": { + "External id": 984314,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939588558.021, "dur": 0.572, + "args": { + "External id": 984315,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939588560.212, "dur": 0.743, + "args": { + "External id": 984316,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939588563.010, "dur": 0.586, + "args": { + "External id": 984317,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939588565.036, "dur": 0.443, + "args": { + "External id": 984318,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939588567.185, "dur": 0.670, + "args": { + "External id": 984319,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939588569.369, "dur": 3.486, + "args": { + "External id": 984320,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939588574.831, "dur": 0.535, + "args": { + "External id": 984321,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939588576.762, "dur": 0.503, + "args": { + "External id": 984322,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939588598.714, "dur": 68.566, + "args": { + "External id": 984323,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939588710.498, "dur": 139.621, + "args": { + "External id": 984324,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939588724.780, "dur": 3.937, + "args": { + "External id": 984325,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939588734.862, "dur": 12.301, + "args": { + "External id": 984326,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939588740.372, "dur": 6.321, + "args": { + "External id": 984327,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939588744.476, "dur": 0.853, + "args": { + "External id": 984328,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939588754.822, "dur": 29.951, + "args": { + "External id": 984329,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939588757.596, "dur": 0.878, + "args": { + "External id": 984330,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939588760.639, "dur": 0.590, + "args": { + "External id": 984331,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939588762.846, "dur": 0.542, + "args": { + "External id": 984332,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939588765.980, "dur": 3.415, + "args": { + "External id": 984333,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939588770.869, "dur": 0.494, + "args": { + "External id": 984334,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939588773.318, "dur": 0.458, + "args": { + "External id": 984335,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939588775.623, "dur": 0.365, + "args": { + "External id": 984336,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939588777.685, "dur": 0.454, + "args": { + "External id": 984337,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939588779.465, "dur": 0.419, + "args": { + "External id": 984338,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939588799.111, "dur": 41.509, + "args": { + "External id": 984339,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939588913.703, "dur": 514.012, + "args": { + "External id": 984340,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939588942.069, "dur": 479.079, + "args": { + "External id": 984341,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17241, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939588953.756, "dur": 459.921, + "args": { + "External id": 984342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939589459.877, "dur": 2.997, + "args": { + "External id": 984343,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17243, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338706, "tid": 2338706, + "ts": 6345939589576.002, "dur": 30118.075, + "args": { + "External id": 984344,"Record function id": 0, "Ev Idx": 17244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939589696.262, "dur": 7.749, + "args": { + "External id": 984345,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939589707.948, "dur": 1.135, + "args": { + "External id": 984346,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939589711.240, "dur": 3.397, + "args": { + "External id": 984347,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939589716.481, "dur": 1.024, + "args": { + "External id": 984348,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939589719.392, "dur": 1.206, + "args": { + "External id": 984349,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939589722.142, "dur": 1.398, + "args": { + "External id": 984350,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939589725.447, "dur": 1.069, + "args": { + "External id": 984351,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939589728.736, "dur": 2.380, + "args": { + "External id": 984352,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939589732.819, "dur": 0.884, + "args": { + "External id": 984353,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939589735.655, "dur": 0.750, + "args": { + "External id": 984354,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939589756.539, "dur": 29883.191, + "args": { + "External id": 984355,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939589775.371, "dur": 29853.876, + "args": { + "External id": 984356,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939589793.772, "dur": 19.306, + "args": { + "External id": 984357,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939589817.334, "dur": 29768.673, + "args": { + "External id": 984358,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939589820.457, "dur": 29763.721, + "args": { + "External id": 984359,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939589824.869, "dur": 7.251, + "args": { + "External id": 984360,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939589834.071, "dur": 29746.581, + "args": { + "External id": 984361,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939619877.804, "dur": 42.726, + "args": { + "External id": 984362,"Sequence number": 10552490, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17262 + } + }, + { + "ph": "s", "id": 429, "pid": 2338706, "tid": 2338706, "ts": 6345939619877.804, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939619901.564, "dur": 12.313, + "args": { + "External id": 984363,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939619906.858, "dur": 6.767, + "args": { + "External id": 984364,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939620023.719, "dur": 119.064, + "args": { + "External id": 984365,"Record function id": 0, "Ev Idx": 17265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939620146.202, "dur": 1294.858, + "args": { + "External id": 984366,"Record function id": 0, "Ev Idx": 17266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939620194.332, "dur": 1230.400, + "args": { + "External id": 984367,"Sequence number": 10552491, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17267 + } + }, + { + "ph": "s", "id": 428, "pid": 2338706, "tid": 2338706, "ts": 6345939620194.332, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939620276.211, "dur": 60.694, + "args": { + "External id": 984368,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939620353.753, "dur": 117.908, + "args": { + "External id": 984369,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939620489.751, "dur": 44.028, + "args": { + "External id": 984370,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939620541.738, "dur": 36.561, + "args": { + "External id": 984371,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939620608.157, "dur": 32.582, + "args": { + "External id": 984372,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939620664.238, "dur": 21.785, + "args": { + "External id": 984373,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939620709.892, "dur": 155.851, + "args": { + "External id": 984374,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939620772.861, "dur": 13.283, + "args": { + "External id": 984375,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939620779.463, "dur": 5.824, + "args": { + "External id": 984376,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939620789.272, "dur": 5.234, + "args": { + "External id": 984377,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939620796.169, "dur": 1.526, + "args": { + "External id": 984378,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939620801.074, "dur": 7.444, + "args": { + "External id": 984379,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939620877.769, "dur": 51.572, + "args": { + "External id": 984380,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939620965.388, "dur": 34.613, + "args": { + "External id": 984381,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939621032.203, "dur": 93.488, + "args": { + "External id": 984382,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939621140.043, "dur": 44.952, + "args": { + "External id": 984383,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939621211.536, "dur": 34.064, + "args": { + "External id": 984384,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939621254.123, "dur": 47.214, + "args": { + "External id": 984385,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939621321.333, "dur": 27.013, + "args": { + "External id": 984386,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17286 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.9)", "pid": 2338706, "tid": 2338706, + "ts": 6345939621517.564, "dur": 91.813, + "args": { + "External id": 984387,"Record function id": 0, "Ev Idx": 17287 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939621697.489, "dur": 53.568, + "args": { + "External id": 984388,"Record function id": 0, "Ev Idx": 17288 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.10)", "pid": 2338706, "tid": 2338706, + "ts": 6345939621762.289, "dur": 30818.462, + "args": { + "External id": 984389,"Record function id": 0, "Ev Idx": 17289 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338706, "tid": 2338706, + "ts": 6345939621771.834, "dur": 1063.618, + "args": { + "External id": 984390,"Record function id": 0, "Ev Idx": 17290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939621866.231, "dur": 10.713, + "args": { + "External id": 984391,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939621893.699, "dur": 44.835, + "args": { + "External id": 984392,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939621900.646, "dur": 2.559, + "args": { + "External id": 984393,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939621906.264, "dur": 0.877, + "args": { + "External id": 984394,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939621909.436, "dur": 0.494, + "args": { + "External id": 984395,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939621911.568, "dur": 0.445, + "args": { + "External id": 984396,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939621918.255, "dur": 0.608, + "args": { + "External id": 984397,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939621920.679, "dur": 0.562, + "args": { + "External id": 984398,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939621923.295, "dur": 3.122, + "args": { + "External id": 984399,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939621927.899, "dur": 0.484, + "args": { + "External id": 984400,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939621930.648, "dur": 0.675, + "args": { + "External id": 984401,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939621951.984, "dur": 77.727, + "args": { + "External id": 984402,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939622116.112, "dur": 149.579, + "args": { + "External id": 984403,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939622132.539, "dur": 6.508, + "args": { + "External id": 984404,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939622145.420, "dur": 14.270, + "args": { + "External id": 984405,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939622150.798, "dur": 8.379, + "args": { + "External id": 984406,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939622155.988, "dur": 1.023, + "args": { + "External id": 984407,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939622167.683, "dur": 30.518, + "args": { + "External id": 984408,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939622170.728, "dur": 0.830, + "args": { + "External id": 984409,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939622173.371, "dur": 0.519, + "args": { + "External id": 984410,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939622175.983, "dur": 0.676, + "args": { + "External id": 984411,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939622177.947, "dur": 3.065, + "args": { + "External id": 984412,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939622183.011, "dur": 0.614, + "args": { + "External id": 984413,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939622185.253, "dur": 0.439, + "args": { + "External id": 984414,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939622188.478, "dur": 0.390, + "args": { + "External id": 984415,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939622190.309, "dur": 0.495, + "args": { + "External id": 984416,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939622192.842, "dur": 0.569, + "args": { + "External id": 984417,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939622211.960, "dur": 43.710, + "args": { + "External id": 984418,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939622329.422, "dur": 401.791, + "args": { + "External id": 984419,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939622359.061, "dur": 366.312, + "args": { + "External id": 984420,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17320, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939622371.099, "dur": 348.340, + "args": { + "External id": 984421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939622756.262, "dur": 2.648, + "args": { + "External id": 984422,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17322, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338706, "tid": 2338706, + "ts": 6345939622859.422, "dur": 29475.076, + "args": { + "External id": 984423,"Record function id": 0, "Ev Idx": 17323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939622975.947, "dur": 6.397, + "args": { + "External id": 984424,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939622986.017, "dur": 1.210, + "args": { + "External id": 984425,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939622989.114, "dur": 3.202, + "args": { + "External id": 984426,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939622994.396, "dur": 0.981, + "args": { + "External id": 984427,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939622997.033, "dur": 0.946, + "args": { + "External id": 984428,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939622999.504, "dur": 1.115, + "args": { + "External id": 984429,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939623002.402, "dur": 0.948, + "args": { + "External id": 984430,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939623005.466, "dur": 27.493, + "args": { + "External id": 984431,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939623038.770, "dur": 1.372, + "args": { + "External id": 984432,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939623042.385, "dur": 0.749, + "args": { + "External id": 984433,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939623108.512, "dur": 29170.752, + "args": { + "External id": 984434,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939623130.274, "dur": 29138.302, + "args": { + "External id": 984435,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939623148.893, "dur": 20.807, + "args": { + "External id": 984436,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939623175.276, "dur": 29048.402, + "args": { + "External id": 984437,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939623178.624, "dur": 29043.366, + "args": { + "External id": 984438,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939623183.424, "dur": 7.450, + "args": { + "External id": 984439,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939623192.792, "dur": 29025.725, + "args": { + "External id": 984440,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939652506.665, "dur": 45.656, + "args": { + "External id": 984441,"Sequence number": 10552492, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17341 + } + }, + { + "ph": "s", "id": 427, "pid": 2338706, "tid": 2338706, "ts": 6345939652506.665, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939652534.418, "dur": 12.227, + "args": { + "External id": 984442,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939652540.125, "dur": 6.243, + "args": { + "External id": 984443,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939652627.898, "dur": 85.357, + "args": { + "External id": 984444,"Record function id": 0, "Ev Idx": 17344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939652714.898, "dur": 1394.318, + "args": { + "External id": 984445,"Record function id": 0, "Ev Idx": 17345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939652757.778, "dur": 1292.971, + "args": { + "External id": 984446,"Sequence number": 10552493, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17346 + } + }, + { + "ph": "s", "id": 426, "pid": 2338706, "tid": 2338706, "ts": 6345939652757.778, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939652840.936, "dur": 54.851, + "args": { + "External id": 984447,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939652912.608, "dur": 132.904, + "args": { + "External id": 984448,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939653112.803, "dur": 56.166, + "args": { + "External id": 984449,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939653183.411, "dur": 34.615, + "args": { + "External id": 984450,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939653259.932, "dur": 34.037, + "args": { + "External id": 984451,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939653327.003, "dur": 23.315, + "args": { + "External id": 984452,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939653376.623, "dur": 159.177, + "args": { + "External id": 984453,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939653439.723, "dur": 12.979, + "args": { + "External id": 984454,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939653445.913, "dur": 5.657, + "args": { + "External id": 984455,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939653456.311, "dur": 5.076, + "args": { + "External id": 984456,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939653462.969, "dur": 1.147, + "args": { + "External id": 984457,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939653467.329, "dur": 6.031, + "args": { + "External id": 984458,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939653550.287, "dur": 58.090, + "args": { + "External id": 984459,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939653644.837, "dur": 38.623, + "args": { + "External id": 984460,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939653695.316, "dur": 51.367, + "args": { + "External id": 984461,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939653754.859, "dur": 42.771, + "args": { + "External id": 984462,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939653820.755, "dur": 34.113, + "args": { + "External id": 984463,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939653862.884, "dur": 44.759, + "args": { + "External id": 984464,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939653927.887, "dur": 24.141, + "args": { + "External id": 984465,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17365 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.10)", "pid": 2338706, "tid": 2338706, + "ts": 6345939654189.414, "dur": 98.082, + "args": { + "External id": 984466,"Record function id": 0, "Ev Idx": 17366 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939654377.632, "dur": 54.440, + "args": { + "External id": 984467,"Record function id": 0, "Ev Idx": 17367 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.11)", "pid": 2338706, "tid": 2338706, + "ts": 6345939654443.794, "dur": 29899.436, + "args": { + "External id": 984468,"Record function id": 0, "Ev Idx": 17368 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338706, "tid": 2338706, + "ts": 6345939654454.638, "dur": 1155.994, + "args": { + "External id": 984469,"Record function id": 0, "Ev Idx": 17369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939654554.027, "dur": 11.339, + "args": { + "External id": 984470,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939654581.153, "dur": 39.679, + "args": { + "External id": 984471,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939654588.264, "dur": 2.618, + "args": { + "External id": 984472,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939654594.271, "dur": 0.966, + "args": { + "External id": 984473,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939654597.064, "dur": 0.426, + "args": { + "External id": 984474,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939654599.480, "dur": 0.717, + "args": { + "External id": 984475,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939654601.508, "dur": 1.090, + "args": { + "External id": 984476,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939654604.418, "dur": 0.764, + "args": { + "External id": 984477,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939654606.605, "dur": 2.550, + "args": { + "External id": 984478,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939654611.630, "dur": 0.647, + "args": { + "External id": 984479,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939654613.758, "dur": 0.551, + "args": { + "External id": 984480,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939654634.434, "dur": 67.670, + "args": { + "External id": 984481,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939654740.889, "dur": 136.183, + "args": { + "External id": 984482,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939654754.927, "dur": 4.247, + "args": { + "External id": 984483,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939654764.957, "dur": 12.486, + "args": { + "External id": 984484,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939654770.298, "dur": 6.645, + "args": { + "External id": 984485,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939654774.807, "dur": 0.697, + "args": { + "External id": 984486,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939654784.936, "dur": 29.067, + "args": { + "External id": 984487,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939654787.562, "dur": 0.804, + "args": { + "External id": 984488,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939654790.603, "dur": 0.731, + "args": { + "External id": 984489,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939654792.743, "dur": 0.574, + "args": { + "External id": 984490,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939654795.753, "dur": 2.703, + "args": { + "External id": 984491,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939654799.682, "dur": 0.520, + "args": { + "External id": 984492,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939654802.367, "dur": 0.620, + "args": { + "External id": 984493,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939654804.670, "dur": 0.440, + "args": { + "External id": 984494,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939654807.026, "dur": 0.512, + "args": { + "External id": 984495,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939654808.948, "dur": 0.501, + "args": { + "External id": 984496,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939654828.851, "dur": 39.569, + "args": { + "External id": 984497,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939654944.371, "dur": 545.857, + "args": { + "External id": 984498,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939654971.614, "dur": 511.598, + "args": { + "External id": 984499,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17399, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939654986.824, "dur": 488.246, + "args": { + "External id": 984500,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939655519.991, "dur": 3.089, + "args": { + "External id": 984501,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17401, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338706, "tid": 2338706, + "ts": 6345939655634.567, "dur": 28416.061, + "args": { + "External id": 984502,"Record function id": 0, "Ev Idx": 17402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939655752.762, "dur": 7.613, + "args": { + "External id": 984503,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939655764.072, "dur": 1.240, + "args": { + "External id": 984504,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939655767.300, "dur": 3.450, + "args": { + "External id": 984505,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939655772.754, "dur": 1.182, + "args": { + "External id": 984506,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939655776.004, "dur": 0.924, + "args": { + "External id": 984507,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939655778.320, "dur": 1.101, + "args": { + "External id": 984508,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939655781.351, "dur": 1.048, + "args": { + "External id": 984509,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939655784.905, "dur": 2.595, + "args": { + "External id": 984510,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939655789.114, "dur": 1.130, + "args": { + "External id": 984511,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939655792.021, "dur": 0.776, + "args": { + "External id": 984512,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939655812.904, "dur": 28169.065, + "args": { + "External id": 984513,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939655831.551, "dur": 28139.398, + "args": { + "External id": 984514,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939655849.977, "dur": 19.363, + "args": { + "External id": 984515,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939655873.534, "dur": 28052.859, + "args": { + "External id": 984516,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939655876.601, "dur": 28048.003, + "args": { + "External id": 984517,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939655881.404, "dur": 6.217, + "args": { + "External id": 984518,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939655889.590, "dur": 28031.287, + "args": { + "External id": 984519,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939684265.247, "dur": 42.279, + "args": { + "External id": 984520,"Sequence number": 10552494, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17420 + } + }, + { + "ph": "s", "id": 425, "pid": 2338706, "tid": 2338706, "ts": 6345939684265.247, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939684288.180, "dur": 12.563, + "args": { + "External id": 984521,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939684293.570, "dur": 6.757, + "args": { + "External id": 984522,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939684394.529, "dur": 82.915, + "args": { + "External id": 984523,"Record function id": 0, "Ev Idx": 17423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939684479.262, "dur": 1310.698, + "args": { + "External id": 984524,"Record function id": 0, "Ev Idx": 17424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939684524.420, "dur": 1250.324, + "args": { + "External id": 984525,"Sequence number": 10552495, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17425 + } + }, + { + "ph": "s", "id": 424, "pid": 2338706, "tid": 2338706, "ts": 6345939684524.420, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939684607.259, "dur": 60.227, + "args": { + "External id": 984526,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939684683.933, "dur": 120.436, + "args": { + "External id": 984527,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939684821.658, "dur": 43.109, + "args": { + "External id": 984528,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939684873.047, "dur": 35.896, + "args": { + "External id": 984529,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939684943.045, "dur": 31.839, + "args": { + "External id": 984530,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939684996.597, "dur": 43.684, + "args": { + "External id": 984531,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939685109.394, "dur": 164.355, + "args": { + "External id": 984532,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939685168.604, "dur": 16.258, + "args": { + "External id": 984533,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939685175.694, "dur": 8.091, + "args": { + "External id": 984534,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939685189.337, "dur": 5.042, + "args": { + "External id": 984535,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939685195.904, "dur": 1.443, + "args": { + "External id": 984536,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939685200.320, "dur": 7.310, + "args": { + "External id": 984537,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939685289.461, "dur": 70.426, + "args": { + "External id": 984538,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939685400.677, "dur": 38.076, + "args": { + "External id": 984539,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939685454.048, "dur": 50.725, + "args": { + "External id": 984540,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939685513.462, "dur": 39.820, + "args": { + "External id": 984541,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939685576.985, "dur": 29.389, + "args": { + "External id": 984542,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939685615.245, "dur": 40.299, + "args": { + "External id": 984543,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939685674.813, "dur": 20.146, + "args": { + "External id": 984544,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17444 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.11)", "pid": 2338706, "tid": 2338706, + "ts": 6345939685863.989, "dur": 91.677, + "args": { + "External id": 984545,"Record function id": 0, "Ev Idx": 17445 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939686108.187, "dur": 58.748, + "args": { + "External id": 984546,"Record function id": 0, "Ev Idx": 17446 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.12)", "pid": 2338706, "tid": 2338706, + "ts": 6345939686178.763, "dur": 32356.749, + "args": { + "External id": 984547,"Record function id": 0, "Ev Idx": 17447 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338706, "tid": 2338706, + "ts": 6345939686187.962, "dur": 1158.974, + "args": { + "External id": 984548,"Record function id": 0, "Ev Idx": 17448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939686290.418, "dur": 12.392, + "args": { + "External id": 984549,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939686324.453, "dur": 47.059, + "args": { + "External id": 984550,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939686331.808, "dur": 2.724, + "args": { + "External id": 984551,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939686337.572, "dur": 0.859, + "args": { + "External id": 984552,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939686340.730, "dur": 0.579, + "args": { + "External id": 984553,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939686342.930, "dur": 0.541, + "args": { + "External id": 984554,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939686346.099, "dur": 0.405, + "args": { + "External id": 984555,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939686348.245, "dur": 0.728, + "args": { + "External id": 984556,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939686351.281, "dur": 3.186, + "args": { + "External id": 984557,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939686356.067, "dur": 0.439, + "args": { + "External id": 984558,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939686358.787, "dur": 0.465, + "args": { + "External id": 984559,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939686391.293, "dur": 65.906, + "args": { + "External id": 984560,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939686495.765, "dur": 134.009, + "args": { + "External id": 984561,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939686510.114, "dur": 4.669, + "args": { + "External id": 984562,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939686521.028, "dur": 12.422, + "args": { + "External id": 984563,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939686525.974, "dur": 6.970, + "args": { + "External id": 984564,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939686530.699, "dur": 0.800, + "args": { + "External id": 984565,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939686541.780, "dur": 29.629, + "args": { + "External id": 984566,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939686544.474, "dur": 0.497, + "args": { + "External id": 984567,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939686546.931, "dur": 0.455, + "args": { + "External id": 984568,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939686549.416, "dur": 0.533, + "args": { + "External id": 984569,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939686551.299, "dur": 2.900, + "args": { + "External id": 984570,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939686556.488, "dur": 0.578, + "args": { + "External id": 984571,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939686558.779, "dur": 0.442, + "args": { + "External id": 984572,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939686561.494, "dur": 0.609, + "args": { + "External id": 984573,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939686563.521, "dur": 0.458, + "args": { + "External id": 984574,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939686566.144, "dur": 0.631, + "args": { + "External id": 984575,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939686584.116, "dur": 35.981, + "args": { + "External id": 984576,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939686742.989, "dur": 480.696, + "args": { + "External id": 984577,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939686777.913, "dur": 438.985, + "args": { + "External id": 984578,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17478, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939686791.254, "dur": 417.442, + "args": { + "External id": 984579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939687252.756, "dur": 2.794, + "args": { + "External id": 984580,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17480, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338706, "tid": 2338706, + "ts": 6345939687371.837, "dur": 30886.601, + "args": { + "External id": 984581,"Record function id": 0, "Ev Idx": 17481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939687493.531, "dur": 7.580, + "args": { + "External id": 984582,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939687504.951, "dur": 1.079, + "args": { + "External id": 984583,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939687508.325, "dur": 3.629, + "args": { + "External id": 984584,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939687513.971, "dur": 0.948, + "args": { + "External id": 984585,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939687516.465, "dur": 1.030, + "args": { + "External id": 984586,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939687518.940, "dur": 1.398, + "args": { + "External id": 984587,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939687522.354, "dur": 1.055, + "args": { + "External id": 984588,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939687525.354, "dur": 2.043, + "args": { + "External id": 984589,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939687530.939, "dur": 0.838, + "args": { + "External id": 984590,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939687533.443, "dur": 0.725, + "args": { + "External id": 984591,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939687554.526, "dur": 30650.613, + "args": { + "External id": 984592,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939687573.642, "dur": 30621.395, + "args": { + "External id": 984593,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939687588.924, "dur": 19.074, + "args": { + "External id": 984594,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939687612.034, "dur": 30540.984, + "args": { + "External id": 984595,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939687615.014, "dur": 30536.376, + "args": { + "External id": 984596,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939687619.960, "dur": 6.319, + "args": { + "External id": 984597,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939687630.211, "dur": 30517.212, + "args": { + "External id": 984598,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939718454.128, "dur": 41.686, + "args": { + "External id": 984599,"Sequence number": 10552496, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17499 + } + }, + { + "ph": "s", "id": 423, "pid": 2338706, "tid": 2338706, "ts": 6345939718454.128, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939718476.990, "dur": 11.959, + "args": { + "External id": 984600,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939718482.160, "dur": 6.409, + "args": { + "External id": 984601,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939718588.118, "dur": 90.460, + "args": { + "External id": 984602,"Record function id": 0, "Ev Idx": 17502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939718680.099, "dur": 1360.440, + "args": { + "External id": 984603,"Record function id": 0, "Ev Idx": 17503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939718728.173, "dur": 1276.106, + "args": { + "External id": 984604,"Sequence number": 10552497, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17504 + } + }, + { + "ph": "s", "id": 422, "pid": 2338706, "tid": 2338706, "ts": 6345939718728.173, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939718813.179, "dur": 52.171, + "args": { + "External id": 984605,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939718880.576, "dur": 121.454, + "args": { + "External id": 984606,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939719043.354, "dur": 94.784, + "args": { + "External id": 984607,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939719152.519, "dur": 38.202, + "args": { + "External id": 984608,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939719228.201, "dur": 33.639, + "args": { + "External id": 984609,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939719287.887, "dur": 24.548, + "args": { + "External id": 984610,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939719340.307, "dur": 163.992, + "args": { + "External id": 984611,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939719399.951, "dur": 15.984, + "args": { + "External id": 984612,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939719407.489, "dur": 7.438, + "args": { + "External id": 984613,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939719420.355, "dur": 4.807, + "args": { + "External id": 984614,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939719426.692, "dur": 1.086, + "args": { + "External id": 984615,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939719430.526, "dur": 6.667, + "args": { + "External id": 984616,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939719519.834, "dur": 57.983, + "args": { + "External id": 984617,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939719618.427, "dur": 36.355, + "args": { + "External id": 984618,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939719667.242, "dur": 52.359, + "args": { + "External id": 984619,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939719728.622, "dur": 42.090, + "args": { + "External id": 984620,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939719794.090, "dur": 35.830, + "args": { + "External id": 984621,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939719838.158, "dur": 44.053, + "args": { + "External id": 984622,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939719902.113, "dur": 23.963, + "args": { + "External id": 984623,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17523 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.12)", "pid": 2338706, "tid": 2338706, + "ts": 6345939720156.812, "dur": 102.294, + "args": { + "External id": 984624,"Record function id": 0, "Ev Idx": 17524 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939720350.156, "dur": 53.847, + "args": { + "External id": 984625,"Record function id": 0, "Ev Idx": 17525 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.13)", "pid": 2338706, "tid": 2338706, + "ts": 6345939720415.513, "dur": 29760.142, + "args": { + "External id": 984626,"Record function id": 0, "Ev Idx": 17526 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338706, "tid": 2338706, + "ts": 6345939720425.594, "dur": 1128.550, + "args": { + "External id": 984627,"Record function id": 0, "Ev Idx": 17527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939720522.017, "dur": 11.574, + "args": { + "External id": 984628,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939720550.977, "dur": 40.088, + "args": { + "External id": 984629,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939720558.237, "dur": 2.866, + "args": { + "External id": 984630,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939720564.027, "dur": 0.586, + "args": { + "External id": 984631,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939720566.785, "dur": 0.593, + "args": { + "External id": 984632,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939720568.997, "dur": 0.489, + "args": { + "External id": 984633,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939720571.523, "dur": 0.922, + "args": { + "External id": 984634,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939720573.765, "dur": 0.775, + "args": { + "External id": 984635,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939720576.486, "dur": 3.373, + "args": { + "External id": 984636,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939720581.321, "dur": 0.684, + "args": { + "External id": 984637,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939720583.771, "dur": 0.441, + "args": { + "External id": 984638,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939720604.805, "dur": 65.657, + "args": { + "External id": 984639,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939720710.480, "dur": 149.219, + "args": { + "External id": 984640,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939720725.072, "dur": 3.865, + "args": { + "External id": 984641,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939720735.146, "dur": 22.357, + "args": { + "External id": 984642,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939720740.430, "dur": 16.372, + "args": { + "External id": 984643,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939720754.312, "dur": 0.738, + "args": { + "External id": 984644,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939720765.927, "dur": 28.133, + "args": { + "External id": 984645,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939720768.472, "dur": 0.875, + "args": { + "External id": 984646,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939720770.930, "dur": 0.538, + "args": { + "External id": 984647,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939720772.881, "dur": 0.591, + "args": { + "External id": 984648,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939720775.697, "dur": 3.040, + "args": { + "External id": 984649,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939720780.398, "dur": 0.429, + "args": { + "External id": 984650,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939720782.647, "dur": 0.379, + "args": { + "External id": 984651,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939720784.440, "dur": 0.523, + "args": { + "External id": 984652,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939720787.276, "dur": 0.386, + "args": { + "External id": 984653,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939720789.207, "dur": 0.406, + "args": { + "External id": 984654,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939720808.266, "dur": 41.597, + "args": { + "External id": 984655,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939720921.343, "dur": 514.050, + "args": { + "External id": 984656,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939720953.831, "dur": 474.704, + "args": { + "External id": 984657,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17557, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939720966.023, "dur": 455.130, + "args": { + "External id": 984658,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939721463.217, "dur": 3.028, + "args": { + "External id": 984659,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17559, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338706, "tid": 2338706, + "ts": 6345939721580.039, "dur": 28299.833, + "args": { + "External id": 984660,"Record function id": 0, "Ev Idx": 17560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939721701.811, "dur": 7.933, + "args": { + "External id": 984661,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939721713.908, "dur": 1.114, + "args": { + "External id": 984662,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939721717.182, "dur": 3.480, + "args": { + "External id": 984663,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939721722.996, "dur": 0.940, + "args": { + "External id": 984664,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939721725.629, "dur": 0.996, + "args": { + "External id": 984665,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939721728.088, "dur": 1.125, + "args": { + "External id": 984666,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939721731.183, "dur": 1.132, + "args": { + "External id": 984667,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939721734.202, "dur": 2.473, + "args": { + "External id": 984668,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939721738.401, "dur": 1.002, + "args": { + "External id": 984669,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939721743.024, "dur": 1.064, + "args": { + "External id": 984670,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939721765.291, "dur": 28061.950, + "args": { + "External id": 984671,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939721784.649, "dur": 28032.113, + "args": { + "External id": 984672,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939721799.672, "dur": 19.069, + "args": { + "External id": 984673,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939721822.653, "dur": 27947.940, + "args": { + "External id": 984674,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939721825.570, "dur": 27943.791, + "args": { + "External id": 984675,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939721830.589, "dur": 6.641, + "args": { + "External id": 984676,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939721839.098, "dur": 27926.010, + "args": { + "External id": 984677,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939750099.420, "dur": 41.657, + "args": { + "External id": 984678,"Sequence number": 10552498, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17578 + } + }, + { + "ph": "s", "id": 421, "pid": 2338706, "tid": 2338706, "ts": 6345939750099.420, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939750122.002, "dur": 13.192, + "args": { + "External id": 984679,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939750127.680, "dur": 7.080, + "args": { + "External id": 984680,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939750224.627, "dur": 88.686, + "args": { + "External id": 984681,"Record function id": 0, "Ev Idx": 17581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939750315.085, "dur": 1332.899, + "args": { + "External id": 984682,"Record function id": 0, "Ev Idx": 17582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939750362.910, "dur": 1267.893, + "args": { + "External id": 984683,"Sequence number": 10552499, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17583 + } + }, + { + "ph": "s", "id": 420, "pid": 2338706, "tid": 2338706, "ts": 6345939750362.910, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939750451.090, "dur": 60.460, + "args": { + "External id": 984684,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939750527.744, "dur": 121.321, + "args": { + "External id": 984685,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939750664.846, "dur": 43.744, + "args": { + "External id": 984686,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939750721.037, "dur": 35.002, + "args": { + "External id": 984687,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939750785.739, "dur": 34.864, + "args": { + "External id": 984688,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939750845.304, "dur": 21.380, + "args": { + "External id": 984689,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939750891.436, "dur": 212.289, + "args": { + "External id": 984690,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939750946.361, "dur": 14.352, + "args": { + "External id": 984691,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939750953.144, "dur": 6.575, + "args": { + "External id": 984692,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939750963.858, "dur": 4.690, + "args": { + "External id": 984693,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939750970.106, "dur": 1.453, + "args": { + "External id": 984694,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939750974.712, "dur": 6.379, + "args": { + "External id": 984695,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939751121.265, "dur": 75.194, + "args": { + "External id": 984696,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939751234.826, "dur": 41.472, + "args": { + "External id": 984697,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939751290.739, "dur": 55.818, + "args": { + "External id": 984698,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939751355.712, "dur": 45.138, + "args": { + "External id": 984699,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939751425.073, "dur": 33.449, + "args": { + "External id": 984700,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939751467.176, "dur": 42.453, + "args": { + "External id": 984701,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939751529.185, "dur": 22.730, + "args": { + "External id": 984702,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17602 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.13)", "pid": 2338706, "tid": 2338706, + "ts": 6345939751724.546, "dur": 91.456, + "args": { + "External id": 984703,"Record function id": 0, "Ev Idx": 17603 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939751905.773, "dur": 54.828, + "args": { + "External id": 984704,"Record function id": 0, "Ev Idx": 17604 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.14)", "pid": 2338706, "tid": 2338706, + "ts": 6345939751972.241, "dur": 31609.771, + "args": { + "External id": 984705,"Record function id": 0, "Ev Idx": 17605 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338706, "tid": 2338706, + "ts": 6345939751983.231, "dur": 1159.151, + "args": { + "External id": 984706,"Record function id": 0, "Ev Idx": 17606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939752156.064, "dur": 10.876, + "args": { + "External id": 984707,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939752183.605, "dur": 41.410, + "args": { + "External id": 984708,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939752190.569, "dur": 2.713, + "args": { + "External id": 984709,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939752196.565, "dur": 0.892, + "args": { + "External id": 984710,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939752199.204, "dur": 0.577, + "args": { + "External id": 984711,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939752201.706, "dur": 0.764, + "args": { + "External id": 984712,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939752204.420, "dur": 0.785, + "args": { + "External id": 984713,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939752207.185, "dur": 0.790, + "args": { + "External id": 984714,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939752209.250, "dur": 3.105, + "args": { + "External id": 984715,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939752214.462, "dur": 0.743, + "args": { + "External id": 984716,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939752216.770, "dur": 0.732, + "args": { + "External id": 984717,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939752238.228, "dur": 67.261, + "args": { + "External id": 984718,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939752345.453, "dur": 139.937, + "args": { + "External id": 984719,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939752362.966, "dur": 4.296, + "args": { + "External id": 984720,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939752373.585, "dur": 13.350, + "args": { + "External id": 984721,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939752379.341, "dur": 7.099, + "args": { + "External id": 984722,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939752383.964, "dur": 1.013, + "args": { + "External id": 984723,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939752395.134, "dur": 28.948, + "args": { + "External id": 984724,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939752397.557, "dur": 0.659, + "args": { + "External id": 984725,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939752400.343, "dur": 0.664, + "args": { + "External id": 984726,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939752402.582, "dur": 0.474, + "args": { + "External id": 984727,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939752405.100, "dur": 2.740, + "args": { + "External id": 984728,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939752409.516, "dur": 0.433, + "args": { + "External id": 984729,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939752412.000, "dur": 0.405, + "args": { + "External id": 984730,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939752414.272, "dur": 0.658, + "args": { + "External id": 984731,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939752416.989, "dur": 0.575, + "args": { + "External id": 984732,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939752419.021, "dur": 0.727, + "args": { + "External id": 984733,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939752436.836, "dur": 38.626, + "args": { + "External id": 984734,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939752547.273, "dur": 414.083, + "args": { + "External id": 984735,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939752578.956, "dur": 377.119, + "args": { + "External id": 984736,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17636, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939752590.880, "dur": 358.793, + "args": { + "External id": 984737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939752985.719, "dur": 3.066, + "args": { + "External id": 984738,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17638, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338706, "tid": 2338706, + "ts": 6345939753170.446, "dur": 30153.551, + "args": { + "External id": 984739,"Record function id": 0, "Ev Idx": 17639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939753291.973, "dur": 7.552, + "args": { + "External id": 984740,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939753304.099, "dur": 1.007, + "args": { + "External id": 984741,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939753307.024, "dur": 3.661, + "args": { + "External id": 984742,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939753312.798, "dur": 0.949, + "args": { + "External id": 984743,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939753315.380, "dur": 1.035, + "args": { + "External id": 984744,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939753318.034, "dur": 0.962, + "args": { + "External id": 984745,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939753320.680, "dur": 0.863, + "args": { + "External id": 984746,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939753324.090, "dur": 2.454, + "args": { + "External id": 984747,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939753328.265, "dur": 0.893, + "args": { + "External id": 984748,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939753331.174, "dur": 0.833, + "args": { + "External id": 984749,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939753353.730, "dur": 29919.881, + "args": { + "External id": 984750,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939753372.054, "dur": 29891.750, + "args": { + "External id": 984751,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939753390.965, "dur": 17.731, + "args": { + "External id": 984752,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939753412.601, "dur": 29809.882, + "args": { + "External id": 984753,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939753415.596, "dur": 29805.457, + "args": { + "External id": 984754,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939753419.976, "dur": 6.090, + "args": { + "External id": 984755,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939753427.763, "dur": 29789.553, + "args": { + "External id": 984756,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939783501.642, "dur": 44.380, + "args": { + "External id": 984757,"Sequence number": 10552500, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17657 + } + }, + { + "ph": "s", "id": 419, "pid": 2338706, "tid": 2338706, "ts": 6345939783501.642, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939783528.470, "dur": 11.151, + "args": { + "External id": 984758,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939783533.762, "dur": 5.650, + "args": { + "External id": 984759,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939783634.291, "dur": 87.600, + "args": { + "External id": 984760,"Record function id": 0, "Ev Idx": 17660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939783723.653, "dur": 1392.745, + "args": { + "External id": 984761,"Record function id": 0, "Ev Idx": 17661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939783773.242, "dur": 1324.500, + "args": { + "External id": 984762,"Sequence number": 10552501, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17662 + } + }, + { + "ph": "s", "id": 418, "pid": 2338706, "tid": 2338706, "ts": 6345939783773.242, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939783862.708, "dur": 62.296, + "args": { + "External id": 984763,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939783942.726, "dur": 177.958, + "args": { + "External id": 984764,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939784142.996, "dur": 50.198, + "args": { + "External id": 984765,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939784206.331, "dur": 38.275, + "args": { + "External id": 984766,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939784277.222, "dur": 35.805, + "args": { + "External id": 984767,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939784336.227, "dur": 22.379, + "args": { + "External id": 984768,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939784384.379, "dur": 161.496, + "args": { + "External id": 984769,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939784445.055, "dur": 15.398, + "args": { + "External id": 984770,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939784452.401, "dur": 7.133, + "args": { + "External id": 984771,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939784464.819, "dur": 4.729, + "args": { + "External id": 984772,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939784470.854, "dur": 1.503, + "args": { + "External id": 984773,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939784475.541, "dur": 6.227, + "args": { + "External id": 984774,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939784560.065, "dur": 57.780, + "args": { + "External id": 984775,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939784652.548, "dur": 37.107, + "args": { + "External id": 984776,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939784705.298, "dur": 56.250, + "args": { + "External id": 984777,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939784770.794, "dur": 42.725, + "args": { + "External id": 984778,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939784837.341, "dur": 32.841, + "args": { + "External id": 984779,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939784878.097, "dur": 42.202, + "args": { + "External id": 984780,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939784939.651, "dur": 21.664, + "args": { + "External id": 984781,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17681 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.14)", "pid": 2338706, "tid": 2338706, + "ts": 6345939785193.477, "dur": 89.561, + "args": { + "External id": 984782,"Record function id": 0, "Ev Idx": 17682 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939785371.884, "dur": 53.619, + "args": { + "External id": 984783,"Record function id": 0, "Ev Idx": 17683 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.15)", "pid": 2338706, "tid": 2338706, + "ts": 6345939785436.799, "dur": 32681.032, + "args": { + "External id": 984784,"Record function id": 0, "Ev Idx": 17684 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338706, "tid": 2338706, + "ts": 6345939785448.073, "dur": 1089.968, + "args": { + "External id": 984785,"Record function id": 0, "Ev Idx": 17685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939785546.897, "dur": 11.524, + "args": { + "External id": 984786,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939785575.368, "dur": 44.572, + "args": { + "External id": 984787,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939785585.890, "dur": 2.580, + "args": { + "External id": 984788,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939785592.870, "dur": 0.733, + "args": { + "External id": 984789,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939785595.213, "dur": 0.765, + "args": { + "External id": 984790,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939785598.324, "dur": 0.501, + "args": { + "External id": 984791,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939785600.907, "dur": 0.799, + "args": { + "External id": 984792,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939785603.425, "dur": 0.687, + "args": { + "External id": 984793,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939785605.524, "dur": 3.086, + "args": { + "External id": 984794,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939785610.418, "dur": 0.699, + "args": { + "External id": 984795,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939785612.641, "dur": 0.509, + "args": { + "External id": 984796,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939785632.559, "dur": 68.440, + "args": { + "External id": 984797,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939785739.701, "dur": 139.697, + "args": { + "External id": 984798,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939785754.404, "dur": 4.245, + "args": { + "External id": 984799,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939785764.774, "dur": 13.084, + "args": { + "External id": 984800,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939785770.437, "dur": 6.918, + "args": { + "External id": 984801,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939785775.004, "dur": 0.932, + "args": { + "External id": 984802,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939785785.207, "dur": 29.712, + "args": { + "External id": 984803,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939785787.952, "dur": 0.789, + "args": { + "External id": 984804,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939785790.409, "dur": 0.861, + "args": { + "External id": 984805,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939785792.857, "dur": 0.688, + "args": { + "External id": 984806,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939785795.389, "dur": 3.171, + "args": { + "External id": 984807,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939785800.313, "dur": 0.440, + "args": { + "External id": 984808,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939785802.851, "dur": 0.740, + "args": { + "External id": 984809,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939785805.089, "dur": 0.491, + "args": { + "External id": 984810,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939785807.587, "dur": 0.659, + "args": { + "External id": 984811,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939785809.933, "dur": 0.642, + "args": { + "External id": 984812,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939785829.589, "dur": 40.802, + "args": { + "External id": 984813,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939785942.531, "dur": 476.637, + "args": { + "External id": 984814,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939785971.121, "dur": 441.324, + "args": { + "External id": 984815,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17715, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939785983.619, "dur": 421.772, + "args": { + "External id": 984816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939786447.566, "dur": 2.834, + "args": { + "External id": 984817,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17717, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338706, "tid": 2338706, + "ts": 6345939786561.940, "dur": 31236.762, + "args": { + "External id": 984818,"Record function id": 0, "Ev Idx": 17718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939786682.451, "dur": 7.176, + "args": { + "External id": 984819,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939786693.558, "dur": 1.171, + "args": { + "External id": 984820,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939786696.882, "dur": 3.464, + "args": { + "External id": 984821,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939786702.323, "dur": 1.091, + "args": { + "External id": 984822,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939786704.890, "dur": 1.327, + "args": { + "External id": 984823,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939786708.007, "dur": 1.034, + "args": { + "External id": 984824,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939786710.653, "dur": 0.907, + "args": { + "External id": 984825,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939786713.814, "dur": 2.511, + "args": { + "External id": 984826,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939786717.912, "dur": 1.011, + "args": { + "External id": 984827,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939786720.640, "dur": 0.941, + "args": { + "External id": 984828,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939786741.867, "dur": 31000.118, + "args": { + "External id": 984829,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939786760.195, "dur": 30971.551, + "args": { + "External id": 984830,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939786776.739, "dur": 19.077, + "args": { + "External id": 984831,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939786802.556, "dur": 30887.146, + "args": { + "External id": 984832,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939786805.587, "dur": 30882.251, + "args": { + "External id": 984833,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939786809.949, "dur": 6.687, + "args": { + "External id": 984834,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939786818.529, "dur": 30865.680, + "args": { + "External id": 984835,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939817984.609, "dur": 60.766, + "args": { + "External id": 984836,"Sequence number": 10552502, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17736 + } + }, + { + "ph": "s", "id": 417, "pid": 2338706, "tid": 2338706, "ts": 6345939817984.609, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939818024.809, "dur": 13.240, + "args": { + "External id": 984837,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939818030.663, "dur": 6.958, + "args": { + "External id": 984838,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939818178.154, "dur": 89.423, + "args": { + "External id": 984839,"Record function id": 0, "Ev Idx": 17739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939818269.375, "dur": 1352.624, + "args": { + "External id": 984840,"Record function id": 0, "Ev Idx": 17740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939818316.398, "dur": 1288.013, + "args": { + "External id": 984841,"Sequence number": 10552503, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17741 + } + }, + { + "ph": "s", "id": 416, "pid": 2338706, "tid": 2338706, "ts": 6345939818316.398, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939818411.238, "dur": 61.951, + "args": { + "External id": 984842,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939818490.703, "dur": 119.707, + "args": { + "External id": 984843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939818626.422, "dur": 44.989, + "args": { + "External id": 984844,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939818684.496, "dur": 35.275, + "args": { + "External id": 984845,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939818747.407, "dur": 31.095, + "args": { + "External id": 984846,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939818801.873, "dur": 20.131, + "args": { + "External id": 984847,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939818846.847, "dur": 158.121, + "args": { + "External id": 984848,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939818904.472, "dur": 15.085, + "args": { + "External id": 984849,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939818911.325, "dur": 7.405, + "args": { + "External id": 984850,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939818923.938, "dur": 4.874, + "args": { + "External id": 984851,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939818930.500, "dur": 1.169, + "args": { + "External id": 984852,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939818934.642, "dur": 6.016, + "args": { + "External id": 984853,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939819042.146, "dur": 110.676, + "args": { + "External id": 984854,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939819198.484, "dur": 38.662, + "args": { + "External id": 984855,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939819250.878, "dur": 57.308, + "args": { + "External id": 984856,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939819321.257, "dur": 45.117, + "args": { + "External id": 984857,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939819391.547, "dur": 32.396, + "args": { + "External id": 984858,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939819432.278, "dur": 43.582, + "args": { + "External id": 984859,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939819496.742, "dur": 22.190, + "args": { + "External id": 984860,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17760 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.15)", "pid": 2338706, "tid": 2338706, + "ts": 6345939819696.771, "dur": 92.759, + "args": { + "External id": 984861,"Record function id": 0, "Ev Idx": 17761 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939819877.910, "dur": 52.922, + "args": { + "External id": 984862,"Record function id": 0, "Ev Idx": 17762 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.16)", "pid": 2338706, "tid": 2338706, + "ts": 6345939819941.779, "dur": 30819.011, + "args": { + "External id": 984863,"Record function id": 0, "Ev Idx": 17763 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338706, "tid": 2338706, + "ts": 6345939819951.837, "dur": 1164.295, + "args": { + "External id": 984864,"Record function id": 0, "Ev Idx": 17764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939820111.659, "dur": 12.097, + "args": { + "External id": 984865,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939820143.121, "dur": 40.553, + "args": { + "External id": 984866,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939820150.317, "dur": 2.752, + "args": { + "External id": 984867,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939820156.474, "dur": 0.803, + "args": { + "External id": 984868,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939820158.817, "dur": 0.528, + "args": { + "External id": 984869,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939820161.622, "dur": 0.820, + "args": { + "External id": 984870,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939820163.857, "dur": 0.445, + "args": { + "External id": 984871,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939820166.495, "dur": 0.732, + "args": { + "External id": 984872,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939820168.836, "dur": 3.530, + "args": { + "External id": 984873,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939820174.172, "dur": 0.499, + "args": { + "External id": 984874,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939820176.124, "dur": 0.624, + "args": { + "External id": 984875,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939820197.399, "dur": 71.630, + "args": { + "External id": 984876,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939820311.031, "dur": 149.186, + "args": { + "External id": 984877,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939820325.769, "dur": 4.498, + "args": { + "External id": 984878,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939820338.008, "dur": 12.490, + "args": { + "External id": 984879,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939820343.430, "dur": 6.570, + "args": { + "External id": 984880,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939820347.816, "dur": 0.901, + "args": { + "External id": 984881,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939820358.172, "dur": 39.646, + "args": { + "External id": 984882,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939820360.609, "dur": 0.601, + "args": { + "External id": 984883,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939820374.043, "dur": 0.608, + "args": { + "External id": 984884,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939820376.385, "dur": 0.451, + "args": { + "External id": 984885,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939820379.170, "dur": 2.937, + "args": { + "External id": 984886,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939820383.412, "dur": 0.420, + "args": { + "External id": 984887,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939820385.862, "dur": 0.675, + "args": { + "External id": 984888,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939820388.379, "dur": 0.394, + "args": { + "External id": 984889,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939820390.756, "dur": 0.507, + "args": { + "External id": 984890,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939820392.776, "dur": 0.396, + "args": { + "External id": 984891,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939820410.508, "dur": 39.502, + "args": { + "External id": 984892,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939820525.424, "dur": 424.459, + "args": { + "External id": 984893,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939820557.003, "dur": 387.747, + "args": { + "External id": 984894,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17794, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939820569.303, "dur": 369.241, + "args": { + "External id": 984895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939820973.124, "dur": 2.736, + "args": { + "External id": 984896,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17796, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338706, "tid": 2338706, + "ts": 6345939821144.196, "dur": 29353.686, + "args": { + "External id": 984897,"Record function id": 0, "Ev Idx": 17797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939821269.014, "dur": 7.583, + "args": { + "External id": 984898,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939821280.446, "dur": 1.046, + "args": { + "External id": 984899,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939821283.479, "dur": 3.370, + "args": { + "External id": 984900,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939821288.741, "dur": 0.865, + "args": { + "External id": 984901,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939821291.323, "dur": 0.950, + "args": { + "External id": 984902,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939821293.730, "dur": 0.963, + "args": { + "External id": 984903,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939821296.432, "dur": 0.972, + "args": { + "External id": 984904,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939821299.808, "dur": 2.218, + "args": { + "External id": 984905,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939821304.187, "dur": 0.871, + "args": { + "External id": 984906,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939821306.880, "dur": 0.933, + "args": { + "External id": 984907,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939821327.484, "dur": 29118.697, + "args": { + "External id": 984908,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939821346.188, "dur": 29089.325, + "args": { + "External id": 984909,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939821363.038, "dur": 19.797, + "args": { + "External id": 984910,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939821389.501, "dur": 29002.838, + "args": { + "External id": 984911,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939821392.491, "dur": 28998.088, + "args": { + "External id": 984912,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939821396.572, "dur": 6.339, + "args": { + "External id": 984913,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939821405.210, "dur": 28981.704, + "args": { + "External id": 984914,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939850691.345, "dur": 41.014, + "args": { + "External id": 984915,"Sequence number": 10552504, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17815 + } + }, + { + "ph": "s", "id": 415, "pid": 2338706, "tid": 2338706, "ts": 6345939850691.345, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939850714.929, "dur": 12.011, + "args": { + "External id": 984916,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939850720.685, "dur": 5.990, + "args": { + "External id": 984917,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939850808.301, "dur": 87.955, + "args": { + "External id": 984918,"Record function id": 0, "Ev Idx": 17818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939850897.856, "dur": 1387.587, + "args": { + "External id": 984919,"Record function id": 0, "Ev Idx": 17819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939850941.111, "dur": 1326.508, + "args": { + "External id": 984920,"Sequence number": 10552505, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17820 + } + }, + { + "ph": "s", "id": 414, "pid": 2338706, "tid": 2338706, "ts": 6345939850941.111, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939851042.743, "dur": 95.179, + "args": { + "External id": 984921,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939851160.231, "dur": 119.375, + "args": { + "External id": 984922,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939851296.210, "dur": 43.012, + "args": { + "External id": 984923,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939851351.859, "dur": 38.209, + "args": { + "External id": 984924,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939851423.014, "dur": 33.171, + "args": { + "External id": 984925,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939851479.387, "dur": 22.921, + "args": { + "External id": 984926,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939851527.894, "dur": 161.529, + "args": { + "External id": 984927,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939851588.276, "dur": 14.536, + "args": { + "External id": 984928,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939851595.278, "dur": 6.558, + "args": { + "External id": 984929,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939851607.008, "dur": 5.593, + "args": { + "External id": 984930,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939851614.174, "dur": 1.374, + "args": { + "External id": 984931,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939851618.575, "dur": 6.389, + "args": { + "External id": 984932,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939851704.179, "dur": 58.199, + "args": { + "External id": 984933,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939851798.650, "dur": 42.163, + "args": { + "External id": 984934,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939851855.210, "dur": 54.387, + "args": { + "External id": 984935,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939851918.313, "dur": 41.452, + "args": { + "External id": 984936,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939851984.313, "dur": 51.192, + "args": { + "External id": 984937,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939852046.271, "dur": 88.921, + "args": { + "External id": 984938,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939852162.770, "dur": 25.009, + "args": { + "External id": 984939,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17839 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.16)", "pid": 2338706, "tid": 2338706, + "ts": 6345939852361.071, "dur": 94.703, + "args": { + "External id": 984940,"Record function id": 0, "Ev Idx": 17840 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939852546.107, "dur": 53.453, + "args": { + "External id": 984941,"Record function id": 0, "Ev Idx": 17841 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.17)", "pid": 2338706, "tid": 2338706, + "ts": 6345939852610.578, "dur": 31290.066, + "args": { + "External id": 984942,"Record function id": 0, "Ev Idx": 17842 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338706, "tid": 2338706, + "ts": 6345939852621.602, "dur": 1136.605, + "args": { + "External id": 984943,"Record function id": 0, "Ev Idx": 17843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939852720.689, "dur": 11.142, + "args": { + "External id": 984944,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939852748.385, "dur": 39.086, + "args": { + "External id": 984945,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939852755.062, "dur": 2.693, + "args": { + "External id": 984946,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939852760.958, "dur": 0.811, + "args": { + "External id": 984947,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939852763.203, "dur": 0.543, + "args": { + "External id": 984948,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939852765.591, "dur": 0.784, + "args": { + "External id": 984949,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939852767.872, "dur": 0.530, + "args": { + "External id": 984950,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939852770.264, "dur": 0.602, + "args": { + "External id": 984951,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939852772.995, "dur": 2.726, + "args": { + "External id": 984952,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939852777.683, "dur": 0.487, + "args": { + "External id": 984953,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939852779.865, "dur": 0.508, + "args": { + "External id": 984954,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939852801.002, "dur": 63.154, + "args": { + "External id": 984955,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939852901.681, "dur": 202.499, + "args": { + "External id": 984956,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939852915.462, "dur": 4.087, + "args": { + "External id": 984957,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939852925.593, "dur": 12.597, + "args": { + "External id": 984958,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939852931.144, "dur": 6.505, + "args": { + "External id": 984959,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939852935.454, "dur": 0.740, + "args": { + "External id": 984960,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939852945.814, "dur": 30.209, + "args": { + "External id": 984961,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939852948.700, "dur": 0.714, + "args": { + "External id": 984962,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939852951.661, "dur": 0.795, + "args": { + "External id": 984963,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939852954.332, "dur": 0.662, + "args": { + "External id": 984964,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939852957.062, "dur": 2.900, + "args": { + "External id": 984965,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939852961.680, "dur": 0.853, + "args": { + "External id": 984966,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939852964.475, "dur": 0.490, + "args": { + "External id": 984967,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939852966.543, "dur": 0.373, + "args": { + "External id": 984968,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939852969.390, "dur": 0.509, + "args": { + "External id": 984969,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939852971.463, "dur": 0.506, + "args": { + "External id": 984970,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939852990.496, "dur": 100.434, + "args": { + "External id": 984971,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939853173.521, "dur": 477.233, + "args": { + "External id": 984972,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939853202.141, "dur": 442.806, + "args": { + "External id": 984973,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17873, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939853214.683, "dur": 423.742, + "args": { + "External id": 984974,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939853674.563, "dur": 2.538, + "args": { + "External id": 984975,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17875, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338706, "tid": 2338706, + "ts": 6345939853781.632, "dur": 29871.343, + "args": { + "External id": 984976,"Record function id": 0, "Ev Idx": 17876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939853899.516, "dur": 7.511, + "args": { + "External id": 984977,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939853911.325, "dur": 1.634, + "args": { + "External id": 984978,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939853914.966, "dur": 4.210, + "args": { + "External id": 984979,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939853921.038, "dur": 1.095, + "args": { + "External id": 984980,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939853923.752, "dur": 1.147, + "args": { + "External id": 984981,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939853926.230, "dur": 1.195, + "args": { + "External id": 984982,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939853929.080, "dur": 0.936, + "args": { + "External id": 984983,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939853932.060, "dur": 2.339, + "args": { + "External id": 984984,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939853936.146, "dur": 0.919, + "args": { + "External id": 984985,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939853938.623, "dur": 1.177, + "args": { + "External id": 984986,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939853959.973, "dur": 29637.969, + "args": { + "External id": 984987,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939853982.032, "dur": 29605.537, + "args": { + "External id": 984988,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939853998.336, "dur": 40.446, + "args": { + "External id": 984989,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939854047.197, "dur": 29498.279, + "args": { + "External id": 984990,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939854050.287, "dur": 29493.475, + "args": { + "External id": 984991,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939854055.103, "dur": 44.399, + "args": { + "External id": 984992,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939854103.973, "dur": 29436.363, + "args": { + "External id": 984993,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939883834.346, "dur": 39.045, + "args": { + "External id": 984994,"Sequence number": 10552506, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17894 + } + }, + { + "ph": "s", "id": 413, "pid": 2338706, "tid": 2338706, "ts": 6345939883834.346, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939883855.964, "dur": 11.942, + "args": { + "External id": 984995,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939883861.532, "dur": 6.106, + "args": { + "External id": 984996,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939883947.880, "dur": 101.786, + "args": { + "External id": 984997,"Record function id": 0, "Ev Idx": 17897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939884084.119, "dur": 1334.863, + "args": { + "External id": 984998,"Record function id": 0, "Ev Idx": 17898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939884133.449, "dur": 1268.686, + "args": { + "External id": 984999,"Sequence number": 10552507, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17899 + } + }, + { + "ph": "s", "id": 412, "pid": 2338706, "tid": 2338706, "ts": 6345939884133.449, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939884218.965, "dur": 57.469, + "args": { + "External id": 985000,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939884293.375, "dur": 117.772, + "args": { + "External id": 985001,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939884426.966, "dur": 42.421, + "args": { + "External id": 985002,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939884477.799, "dur": 42.168, + "args": { + "External id": 985003,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939884557.212, "dur": 34.821, + "args": { + "External id": 985004,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939884615.578, "dur": 22.088, + "args": { + "External id": 985005,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939884669.696, "dur": 157.077, + "args": { + "External id": 985006,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939884727.302, "dur": 15.214, + "args": { + "External id": 985007,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939884734.362, "dur": 7.063, + "args": { + "External id": 985008,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939884747.001, "dur": 5.434, + "args": { + "External id": 985009,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939884753.910, "dur": 1.518, + "args": { + "External id": 985010,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939884758.403, "dur": 6.850, + "args": { + "External id": 985011,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939884841.768, "dur": 54.131, + "args": { + "External id": 985012,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939884928.234, "dur": 36.096, + "args": { + "External id": 985013,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939884976.263, "dur": 111.998, + "args": { + "External id": 985014,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939885107.961, "dur": 53.293, + "args": { + "External id": 985015,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939885187.041, "dur": 36.657, + "args": { + "External id": 985016,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939885231.774, "dur": 46.357, + "args": { + "External id": 985017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939885297.438, "dur": 23.609, + "args": { + "External id": 985018,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17918 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.17)", "pid": 2338706, "tid": 2338706, + "ts": 6345939885495.396, "dur": 90.596, + "args": { + "External id": 985019,"Record function id": 0, "Ev Idx": 17919 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939885676.171, "dur": 53.922, + "args": { + "External id": 985020,"Record function id": 0, "Ev Idx": 17920 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.18)", "pid": 2338706, "tid": 2338706, + "ts": 6345939885741.521, "dur": 32047.277, + "args": { + "External id": 985021,"Record function id": 0, "Ev Idx": 17921 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338706, "tid": 2338706, + "ts": 6345939885751.533, "dur": 1084.640, + "args": { + "External id": 985022,"Record function id": 0, "Ev Idx": 17922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939885851.617, "dur": 9.699, + "args": { + "External id": 985023,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939885876.066, "dur": 39.813, + "args": { + "External id": 985024,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939885882.778, "dur": 2.421, + "args": { + "External id": 985025,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939885888.403, "dur": 0.640, + "args": { + "External id": 985026,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939885890.826, "dur": 0.600, + "args": { + "External id": 985027,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939885893.833, "dur": 0.700, + "args": { + "External id": 985028,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939885896.025, "dur": 0.645, + "args": { + "External id": 985029,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939885898.383, "dur": 0.594, + "args": { + "External id": 985030,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939885900.644, "dur": 3.110, + "args": { + "External id": 985031,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939885906.053, "dur": 0.704, + "args": { + "External id": 985032,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939885908.391, "dur": 0.472, + "args": { + "External id": 985033,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939885929.636, "dur": 63.261, + "args": { + "External id": 985034,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939886094.678, "dur": 158.153, + "args": { + "External id": 985035,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939886111.644, "dur": 6.650, + "args": { + "External id": 985036,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939886124.974, "dur": 14.439, + "args": { + "External id": 985037,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939886130.461, "dur": 8.476, + "args": { + "External id": 985038,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939886135.649, "dur": 1.182, + "args": { + "External id": 985039,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939886148.748, "dur": 36.384, + "args": { + "External id": 985040,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939886151.932, "dur": 0.822, + "args": { + "External id": 985041,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939886154.945, "dur": 0.593, + "args": { + "External id": 985042,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939886157.044, "dur": 0.640, + "args": { + "External id": 985043,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939886160.025, "dur": 3.019, + "args": { + "External id": 985044,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939886164.384, "dur": 0.445, + "args": { + "External id": 985045,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939886167.113, "dur": 0.571, + "args": { + "External id": 985046,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939886169.084, "dur": 0.649, + "args": { + "External id": 985047,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939886176.714, "dur": 0.423, + "args": { + "External id": 985048,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939886178.766, "dur": 0.603, + "args": { + "External id": 985049,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939886199.855, "dur": 42.163, + "args": { + "External id": 985050,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939886319.691, "dur": 412.303, + "args": { + "External id": 985051,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939886350.365, "dur": 376.181, + "args": { + "External id": 985052,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17952, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939886363.864, "dur": 356.830, + "args": { + "External id": 985053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939886755.292, "dur": 2.864, + "args": { + "External id": 985054,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17954, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338706, "tid": 2338706, + "ts": 6345939886858.921, "dur": 30665.317, + "args": { + "External id": 985055,"Record function id": 0, "Ev Idx": 17955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939886974.800, "dur": 6.166, + "args": { + "External id": 985056,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939886984.939, "dur": 1.080, + "args": { + "External id": 985057,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939886987.982, "dur": 3.976, + "args": { + "External id": 985058,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939886993.981, "dur": 1.033, + "args": { + "External id": 985059,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939886996.954, "dur": 1.230, + "args": { + "External id": 985060,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939886999.593, "dur": 0.922, + "args": { + "External id": 985061,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939887002.256, "dur": 1.492, + "args": { + "External id": 985062,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939887005.989, "dur": 24.840, + "args": { + "External id": 985063,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939887036.849, "dur": 1.469, + "args": { + "External id": 985064,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939887040.353, "dur": 1.014, + "args": { + "External id": 985065,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939887101.443, "dur": 30366.827, + "args": { + "External id": 985066,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939887121.589, "dur": 30336.642, + "args": { + "External id": 985067,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939887138.565, "dur": 20.154, + "args": { + "External id": 985068,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939887165.376, "dur": 30250.410, + "args": { + "External id": 985069,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939887168.527, "dur": 30246.545, + "args": { + "External id": 985070,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939887172.894, "dur": 7.580, + "args": { + "External id": 985071,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939887182.250, "dur": 30228.150, + "args": { + "External id": 985072,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939917710.054, "dur": 44.408, + "args": { + "External id": 985073,"Sequence number": 10552508, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17973 + } + }, + { + "ph": "s", "id": 411, "pid": 2338706, "tid": 2338706, "ts": 6345939917710.054, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939917735.945, "dur": 11.662, + "args": { + "External id": 985074,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939917741.411, "dur": 5.931, + "args": { + "External id": 985075,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939917842.181, "dur": 86.116, + "args": { + "External id": 985076,"Record function id": 0, "Ev Idx": 17976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939917929.928, "dur": 1388.391, + "args": { + "External id": 985077,"Record function id": 0, "Ev Idx": 17977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939917980.577, "dur": 1320.768, + "args": { + "External id": 985078,"Sequence number": 10552509, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17978 + } + }, + { + "ph": "s", "id": 410, "pid": 2338706, "tid": 2338706, "ts": 6345939917980.577, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939918126.505, "dur": 58.992, + "args": { + "External id": 985079,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939918205.888, "dur": 118.100, + "args": { + "External id": 985080,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939918340.710, "dur": 42.675, + "args": { + "External id": 985081,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939918391.436, "dur": 35.290, + "args": { + "External id": 985082,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939918460.968, "dur": 31.839, + "args": { + "External id": 985083,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939918516.004, "dur": 21.449, + "args": { + "External id": 985084,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939918562.150, "dur": 171.812, + "args": { + "External id": 985085,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939918618.468, "dur": 13.394, + "args": { + "External id": 985086,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939918624.960, "dur": 5.859, + "args": { + "External id": 985087,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939918635.043, "dur": 5.412, + "args": { + "External id": 985088,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939918662.607, "dur": 1.410, + "args": { + "External id": 985089,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939918667.150, "dur": 6.769, + "args": { + "External id": 985090,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939918748.022, "dur": 55.004, + "args": { + "External id": 985091,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939918837.059, "dur": 36.952, + "args": { + "External id": 985092,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939918886.799, "dur": 51.318, + "args": { + "External id": 985093,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939918950.471, "dur": 40.860, + "args": { + "External id": 985094,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939919039.978, "dur": 74.576, + "args": { + "External id": 985095,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939919125.665, "dur": 49.113, + "args": { + "External id": 985096,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939919197.605, "dur": 22.734, + "args": { + "External id": 985097,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17997 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.18)", "pid": 2338706, "tid": 2338706, + "ts": 6345939919394.498, "dur": 95.365, + "args": { + "External id": 985098,"Record function id": 0, "Ev Idx": 17998 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939919578.023, "dur": 50.783, + "args": { + "External id": 985099,"Record function id": 0, "Ev Idx": 17999 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.19)", "pid": 2338706, "tid": 2338706, + "ts": 6345939919639.778, "dur": 31548.374, + "args": { + "External id": 985100,"Record function id": 0, "Ev Idx": 18000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338706, "tid": 2338706, + "ts": 6345939919649.277, "dur": 1113.806, + "args": { + "External id": 985101,"Record function id": 0, "Ev Idx": 18001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939919747.964, "dur": 11.134, + "args": { + "External id": 985102,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939919775.947, "dur": 42.218, + "args": { + "External id": 985103,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939919783.056, "dur": 2.471, + "args": { + "External id": 985104,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939919789.330, "dur": 0.760, + "args": { + "External id": 985105,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939919791.965, "dur": 0.631, + "args": { + "External id": 985106,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939919794.382, "dur": 0.684, + "args": { + "External id": 985107,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939919796.578, "dur": 0.660, + "args": { + "External id": 985108,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939919799.513, "dur": 0.700, + "args": { + "External id": 985109,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939919802.187, "dur": 3.412, + "args": { + "External id": 985110,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939919807.762, "dur": 0.468, + "args": { + "External id": 985111,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939919810.034, "dur": 0.466, + "args": { + "External id": 985112,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939919831.474, "dur": 59.944, + "args": { + "External id": 985113,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939919931.150, "dur": 223.078, + "args": { + "External id": 985114,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939919945.946, "dur": 3.828, + "args": { + "External id": 985115,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939919956.136, "dur": 12.666, + "args": { + "External id": 985116,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939919961.459, "dur": 6.815, + "args": { + "External id": 985117,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939919965.921, "dur": 0.866, + "args": { + "External id": 985118,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939919977.232, "dur": 62.211, + "args": { + "External id": 985119,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939919979.746, "dur": 0.661, + "args": { + "External id": 985120,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939919982.387, "dur": 0.507, + "args": { + "External id": 985121,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939919993.810, "dur": 0.486, + "args": { + "External id": 985122,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939919996.625, "dur": 3.167, + "args": { + "External id": 985123,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939920001.588, "dur": 0.514, + "args": { + "External id": 985124,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939920003.943, "dur": 0.502, + "args": { + "External id": 985125,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939920005.922, "dur": 0.590, + "args": { + "External id": 985126,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939920029.416, "dur": 0.875, + "args": { + "External id": 985127,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939920033.355, "dur": 0.548, + "args": { + "External id": 985128,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939920094.072, "dur": 48.071, + "args": { + "External id": 985129,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939920222.262, "dur": 432.505, + "args": { + "External id": 985130,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939920254.915, "dur": 394.446, + "args": { + "External id": 985131,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18031, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939920267.094, "dur": 374.177, + "args": { + "External id": 985132,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939920678.648, "dur": 2.591, + "args": { + "External id": 985133,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18033, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338706, "tid": 2338706, + "ts": 6345939920787.147, "dur": 30074.808, + "args": { + "External id": 985134,"Record function id": 0, "Ev Idx": 18034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939920906.633, "dur": 6.999, + "args": { + "External id": 985135,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939920922.050, "dur": 1.506, + "args": { + "External id": 985136,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939920925.546, "dur": 3.708, + "args": { + "External id": 985137,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939920933.267, "dur": 0.959, + "args": { + "External id": 985138,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939920935.999, "dur": 1.048, + "args": { + "External id": 985139,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939920938.510, "dur": 1.391, + "args": { + "External id": 985140,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939920941.430, "dur": 1.142, + "args": { + "External id": 985141,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939920946.484, "dur": 3.423, + "args": { + "External id": 985142,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939920951.729, "dur": 0.779, + "args": { + "External id": 985143,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939920954.123, "dur": 0.842, + "args": { + "External id": 985144,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939920974.553, "dur": 29825.418, + "args": { + "External id": 985145,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939920993.218, "dur": 29795.629, + "args": { + "External id": 985146,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939921100.069, "dur": 21.436, + "args": { + "External id": 985147,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939921129.896, "dur": 29612.528, + "args": { + "External id": 985148,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939921133.260, "dur": 29607.450, + "args": { + "External id": 985149,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939921137.700, "dur": 7.590, + "args": { + "External id": 985150,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939921147.340, "dur": 29589.726, + "args": { + "External id": 985151,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939951108.416, "dur": 44.340, + "args": { + "External id": 985152,"Sequence number": 10552510, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18052 + } + }, + { + "ph": "s", "id": 409, "pid": 2338706, "tid": 2338706, "ts": 6345939951108.416, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939951132.915, "dur": 12.925, + "args": { + "External id": 985153,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939951138.712, "dur": 6.737, + "args": { + "External id": 985154,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939951236.809, "dur": 87.794, + "args": { + "External id": 985155,"Record function id": 0, "Ev Idx": 18055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939951326.128, "dur": 1324.512, + "args": { + "External id": 985156,"Record function id": 0, "Ev Idx": 18056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939951370.500, "dur": 1262.732, + "args": { + "External id": 985157,"Sequence number": 10552511, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18057 + } + }, + { + "ph": "s", "id": 408, "pid": 2338706, "tid": 2338706, "ts": 6345939951370.500, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939951453.599, "dur": 58.602, + "args": { + "External id": 985158,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939951528.882, "dur": 122.849, + "args": { + "External id": 985159,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939951668.111, "dur": 45.502, + "args": { + "External id": 985160,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939951722.688, "dur": 35.276, + "args": { + "External id": 985161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939951790.490, "dur": 32.283, + "args": { + "External id": 985162,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939951844.906, "dur": 21.071, + "args": { + "External id": 985163,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939951890.525, "dur": 228.985, + "args": { + "External id": 985164,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939951952.831, "dur": 15.618, + "args": { + "External id": 985165,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939951960.155, "dur": 7.130, + "args": { + "External id": 985166,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939951972.882, "dur": 5.160, + "args": { + "External id": 985167,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939951979.545, "dur": 1.334, + "args": { + "External id": 985168,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939951983.829, "dur": 5.970, + "args": { + "External id": 985169,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939952137.839, "dur": 71.614, + "args": { + "External id": 985170,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939952246.275, "dur": 38.589, + "args": { + "External id": 985171,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939952297.899, "dur": 50.980, + "args": { + "External id": 985172,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939952360.710, "dur": 41.960, + "args": { + "External id": 985173,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939952427.804, "dur": 33.296, + "args": { + "External id": 985174,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939952469.115, "dur": 42.575, + "args": { + "External id": 985175,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939952531.435, "dur": 23.119, + "args": { + "External id": 985176,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18076 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.19)", "pid": 2338706, "tid": 2338706, + "ts": 6345939952725.689, "dur": 89.577, + "args": { + "External id": 985177,"Record function id": 0, "Ev Idx": 18077 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939952903.183, "dur": 54.363, + "args": { + "External id": 985178,"Record function id": 0, "Ev Idx": 18078 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.20)", "pid": 2338706, "tid": 2338706, + "ts": 6345939952967.795, "dur": 32256.162, + "args": { + "External id": 985179,"Record function id": 0, "Ev Idx": 18079 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338706, "tid": 2338706, + "ts": 6345939952978.152, "dur": 1151.694, + "args": { + "External id": 985180,"Record function id": 0, "Ev Idx": 18080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939953137.511, "dur": 12.009, + "args": { + "External id": 985181,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939953166.787, "dur": 42.576, + "args": { + "External id": 985182,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939953173.682, "dur": 2.487, + "args": { + "External id": 985183,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939953179.448, "dur": 0.789, + "args": { + "External id": 985184,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939953182.340, "dur": 0.539, + "args": { + "External id": 985185,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939953184.926, "dur": 0.949, + "args": { + "External id": 985186,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939953187.311, "dur": 0.587, + "args": { + "External id": 985187,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939953190.108, "dur": 0.717, + "args": { + "External id": 985188,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939953192.095, "dur": 2.768, + "args": { + "External id": 985189,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939953196.951, "dur": 0.511, + "args": { + "External id": 985190,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939953199.130, "dur": 0.516, + "args": { + "External id": 985191,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939953224.216, "dur": 69.743, + "args": { + "External id": 985192,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939953334.385, "dur": 137.355, + "args": { + "External id": 985193,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939953347.929, "dur": 4.280, + "args": { + "External id": 985194,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939953358.705, "dur": 12.966, + "args": { + "External id": 985195,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939953364.191, "dur": 6.991, + "args": { + "External id": 985196,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939953368.889, "dur": 0.873, + "args": { + "External id": 985197,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939953379.561, "dur": 30.877, + "args": { + "External id": 985198,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939953382.026, "dur": 0.576, + "args": { + "External id": 985199,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939953384.639, "dur": 0.761, + "args": { + "External id": 985200,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939953387.217, "dur": 0.669, + "args": { + "External id": 985201,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939953389.981, "dur": 2.925, + "args": { + "External id": 985202,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939953394.511, "dur": 0.486, + "args": { + "External id": 985203,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939953396.707, "dur": 0.571, + "args": { + "External id": 985204,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939953398.936, "dur": 0.583, + "args": { + "External id": 985205,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939953401.700, "dur": 0.624, + "args": { + "External id": 985206,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939953403.691, "dur": 0.425, + "args": { + "External id": 985207,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939953423.203, "dur": 38.591, + "args": { + "External id": 985208,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939953545.356, "dur": 412.995, + "args": { + "External id": 985209,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939953579.884, "dur": 373.226, + "args": { + "External id": 985210,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18110, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939953592.277, "dur": 354.865, + "args": { + "External id": 985211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939953982.029, "dur": 2.710, + "args": { + "External id": 985212,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18112, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338706, "tid": 2338706, + "ts": 6345939954157.755, "dur": 30753.014, + "args": { + "External id": 985213,"Record function id": 0, "Ev Idx": 18113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939954276.782, "dur": 7.841, + "args": { + "External id": 985214,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939954288.666, "dur": 0.920, + "args": { + "External id": 985215,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939954291.700, "dur": 3.439, + "args": { + "External id": 985216,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939954296.972, "dur": 1.184, + "args": { + "External id": 985217,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939954300.032, "dur": 0.933, + "args": { + "External id": 985218,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939954302.582, "dur": 1.253, + "args": { + "External id": 985219,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939954305.372, "dur": 1.363, + "args": { + "External id": 985220,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939954308.912, "dur": 3.301, + "args": { + "External id": 985221,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939954313.685, "dur": 1.000, + "args": { + "External id": 985222,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939954316.914, "dur": 1.038, + "args": { + "External id": 985223,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939954341.436, "dur": 30515.214, + "args": { + "External id": 985224,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939954361.085, "dur": 30485.820, + "args": { + "External id": 985225,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939954380.735, "dur": 20.247, + "args": { + "External id": 985226,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939954405.318, "dur": 30399.361, + "args": { + "External id": 985227,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939954408.288, "dur": 30394.658, + "args": { + "External id": 985228,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939954412.780, "dur": 6.821, + "args": { + "External id": 985229,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939954421.785, "dur": 30377.500, + "args": { + "External id": 985230,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939985143.829, "dur": 45.191, + "args": { + "External id": 985231,"Sequence number": 10552512, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18131 + } + }, + { + "ph": "s", "id": 407, "pid": 2338706, "tid": 2338706, "ts": 6345939985143.829, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345939985169.348, "dur": 12.968, + "args": { + "External id": 985232,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939985174.921, "dur": 6.937, + "args": { + "External id": 985233,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345939985281.611, "dur": 86.239, + "args": { + "External id": 985234,"Record function id": 0, "Ev Idx": 18134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345939985369.451, "dur": 1335.862, + "args": { + "External id": 985235,"Record function id": 0, "Ev Idx": 18135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345939985415.474, "dur": 1272.695, + "args": { + "External id": 985236,"Sequence number": 10552513, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18136 + } + }, + { + "ph": "s", "id": 406, "pid": 2338706, "tid": 2338706, "ts": 6345939985415.474, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939985502.072, "dur": 58.990, + "args": { + "External id": 985237,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939985577.009, "dur": 122.960, + "args": { + "External id": 985238,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939985716.065, "dur": 41.648, + "args": { + "External id": 985239,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939985766.157, "dur": 35.651, + "args": { + "External id": 985240,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939985835.979, "dur": 31.307, + "args": { + "External id": 985241,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345939985890.702, "dur": 22.342, + "args": { + "External id": 985242,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345939985936.829, "dur": 231.081, + "args": { + "External id": 985243,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345939985996.464, "dur": 38.112, + "args": { + "External id": 985244,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939986003.289, "dur": 28.826, + "args": { + "External id": 985245,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939986038.982, "dur": 5.394, + "args": { + "External id": 985246,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939986045.820, "dur": 1.252, + "args": { + "External id": 985247,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939986050.456, "dur": 46.868, + "args": { + "External id": 985248,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939986183.884, "dur": 66.592, + "args": { + "External id": 985249,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345939986290.893, "dur": 37.960, + "args": { + "External id": 985250,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939986341.112, "dur": 55.558, + "args": { + "External id": 985251,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939986409.423, "dur": 43.725, + "args": { + "External id": 985252,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345939986478.550, "dur": 35.747, + "args": { + "External id": 985253,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345939986522.417, "dur": 44.157, + "args": { + "External id": 985254,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345939986586.723, "dur": 24.119, + "args": { + "External id": 985255,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18155 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.20)", "pid": 2338706, "tid": 2338706, + "ts": 6345939986778.070, "dur": 91.896, + "args": { + "External id": 985256,"Record function id": 0, "Ev Idx": 18156 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345939986958.271, "dur": 77.848, + "args": { + "External id": 985257,"Record function id": 0, "Ev Idx": 18157 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.21)", "pid": 2338706, "tid": 2338706, + "ts": 6345939987049.324, "dur": 31707.018, + "args": { + "External id": 985258,"Record function id": 0, "Ev Idx": 18158 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338706, "tid": 2338706, + "ts": 6345939987105.498, "dur": 1108.805, + "args": { + "External id": 985259,"Record function id": 0, "Ev Idx": 18159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939987210.733, "dur": 12.332, + "args": { + "External id": 985260,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939987241.575, "dur": 42.903, + "args": { + "External id": 985261,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939987249.021, "dur": 2.621, + "args": { + "External id": 985262,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939987254.734, "dur": 0.765, + "args": { + "External id": 985263,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939987257.359, "dur": 0.436, + "args": { + "External id": 985264,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939987259.970, "dur": 0.789, + "args": { + "External id": 985265,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939987262.793, "dur": 0.587, + "args": { + "External id": 985266,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939987265.313, "dur": 0.770, + "args": { + "External id": 985267,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939987267.929, "dur": 3.051, + "args": { + "External id": 985268,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939987273.399, "dur": 0.490, + "args": { + "External id": 985269,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939987275.083, "dur": 0.465, + "args": { + "External id": 985270,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939987299.289, "dur": 69.054, + "args": { + "External id": 985271,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345939987407.836, "dur": 146.967, + "args": { + "External id": 985272,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939987422.675, "dur": 4.332, + "args": { + "External id": 985273,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345939987432.941, "dur": 12.846, + "args": { + "External id": 985274,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345939987438.559, "dur": 6.740, + "args": { + "External id": 985275,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939987443.029, "dur": 0.828, + "args": { + "External id": 985276,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345939987453.760, "dur": 35.079, + "args": { + "External id": 985277,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939987456.489, "dur": 0.657, + "args": { + "External id": 985278,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939987458.786, "dur": 0.545, + "args": { + "External id": 985279,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939987466.827, "dur": 0.600, + "args": { + "External id": 985280,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939987469.232, "dur": 3.145, + "args": { + "External id": 985281,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939987474.319, "dur": 0.567, + "args": { + "External id": 985282,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939987476.805, "dur": 0.454, + "args": { + "External id": 985283,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939987478.850, "dur": 0.508, + "args": { + "External id": 985284,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939987481.234, "dur": 0.623, + "args": { + "External id": 985285,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939987483.194, "dur": 0.380, + "args": { + "External id": 985286,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939987503.233, "dur": 41.265, + "args": { + "External id": 985287,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345939987616.314, "dur": 474.114, + "args": { + "External id": 985288,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939987648.194, "dur": 401.600, + "args": { + "External id": 985289,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18189, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345939987660.294, "dur": 382.595, + "args": { + "External id": 985290,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345939988122.045, "dur": 4.144, + "args": { + "External id": 985291,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18191, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338706, "tid": 2338706, + "ts": 6345939988238.810, "dur": 30266.009, + "args": { + "External id": 985292,"Record function id": 0, "Ev Idx": 18192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939988356.943, "dur": 7.637, + "args": { + "External id": 985293,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939988368.279, "dur": 1.210, + "args": { + "External id": 985294,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939988371.349, "dur": 3.815, + "args": { + "External id": 985295,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939988377.006, "dur": 0.959, + "args": { + "External id": 985296,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939988379.403, "dur": 1.359, + "args": { + "External id": 985297,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939988382.409, "dur": 0.938, + "args": { + "External id": 985298,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939988385.184, "dur": 1.203, + "args": { + "External id": 985299,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939988388.920, "dur": 2.286, + "args": { + "External id": 985300,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939988392.764, "dur": 0.797, + "args": { + "External id": 985301,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345939988395.428, "dur": 1.114, + "args": { + "External id": 985302,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939988417.551, "dur": 30023.240, + "args": { + "External id": 985303,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939988435.722, "dur": 29992.439, + "args": { + "External id": 985304,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345939988454.335, "dur": 20.095, + "args": { + "External id": 985305,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345939988482.045, "dur": 29897.983, + "args": { + "External id": 985306,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345939988485.313, "dur": 29893.864, + "args": { + "External id": 985307,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345939988489.530, "dur": 6.949, + "args": { + "External id": 985308,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345939988498.397, "dur": 29875.747, + "args": { + "External id": 985309,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940018689.286, "dur": 38.109, + "args": { + "External id": 985310,"Sequence number": 10552514, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18210 + } + }, + { + "ph": "s", "id": 405, "pid": 2338706, "tid": 2338706, "ts": 6345940018689.286, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345940018709.421, "dur": 12.428, + "args": { + "External id": 985311,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940018715.062, "dur": 6.544, + "args": { + "External id": 985312,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345940018803.841, "dur": 88.690, + "args": { + "External id": 985313,"Record function id": 0, "Ev Idx": 18213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345940018893.925, "dur": 1405.902, + "args": { + "External id": 985314,"Record function id": 0, "Ev Idx": 18214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940018938.474, "dur": 1343.529, + "args": { + "External id": 985315,"Sequence number": 10552515, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18215 + } + }, + { + "ph": "s", "id": 404, "pid": 2338706, "tid": 2338706, "ts": 6345940018938.474, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345940019036.177, "dur": 100.680, + "args": { + "External id": 985316,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940019157.583, "dur": 117.839, + "args": { + "External id": 985317,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940019292.141, "dur": 44.105, + "args": { + "External id": 985318,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940019345.340, "dur": 37.792, + "args": { + "External id": 985319,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345940019420.892, "dur": 36.115, + "args": { + "External id": 985320,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345940019488.834, "dur": 22.793, + "args": { + "External id": 985321,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345940019537.784, "dur": 162.356, + "args": { + "External id": 985322,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940019599.052, "dur": 15.115, + "args": { + "External id": 985323,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940019606.191, "dur": 6.969, + "args": { + "External id": 985324,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940019618.614, "dur": 4.944, + "args": { + "External id": 985325,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940019625.138, "dur": 1.327, + "args": { + "External id": 985326,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940019629.593, "dur": 6.305, + "args": { + "External id": 985327,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940019714.314, "dur": 58.999, + "args": { + "External id": 985328,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345940019808.226, "dur": 37.430, + "args": { + "External id": 985329,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940019858.252, "dur": 51.324, + "args": { + "External id": 985330,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940019922.056, "dur": 43.394, + "args": { + "External id": 985331,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345940019990.024, "dur": 52.889, + "args": { + "External id": 985332,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940020092.106, "dur": 56.904, + "args": { + "External id": 985333,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345940020174.302, "dur": 26.263, + "args": { + "External id": 985334,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18234 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.21)", "pid": 2338706, "tid": 2338706, + "ts": 6345940020376.562, "dur": 96.555, + "args": { + "External id": 985335,"Record function id": 0, "Ev Idx": 18235 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345940020564.362, "dur": 55.106, + "args": { + "External id": 985336,"Record function id": 0, "Ev Idx": 18236 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.22)", "pid": 2338706, "tid": 2338706, + "ts": 6345940020630.136, "dur": 31186.379, + "args": { + "External id": 985337,"Record function id": 0, "Ev Idx": 18237 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338706, "tid": 2338706, + "ts": 6345940020641.776, "dur": 1138.974, + "args": { + "External id": 985338,"Record function id": 0, "Ev Idx": 18238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940020741.718, "dur": 10.556, + "args": { + "External id": 985339,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345940020767.368, "dur": 38.760, + "args": { + "External id": 985340,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940020774.064, "dur": 2.340, + "args": { + "External id": 985341,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940020779.607, "dur": 0.542, + "args": { + "External id": 985342,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940020782.140, "dur": 0.696, + "args": { + "External id": 985343,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940020784.467, "dur": 0.580, + "args": { + "External id": 985344,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940020787.275, "dur": 0.803, + "args": { + "External id": 985345,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940020789.880, "dur": 0.533, + "args": { + "External id": 985346,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940020792.380, "dur": 2.718, + "args": { + "External id": 985347,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940020796.696, "dur": 0.515, + "args": { + "External id": 985348,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940020798.956, "dur": 0.462, + "args": { + "External id": 985349,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940020819.422, "dur": 62.063, + "args": { + "External id": 985350,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345940020921.222, "dur": 201.766, + "args": { + "External id": 985351,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940020934.226, "dur": 4.536, + "args": { + "External id": 985352,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345940020944.592, "dur": 12.041, + "args": { + "External id": 985353,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940020949.702, "dur": 6.426, + "args": { + "External id": 985354,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940020953.916, "dur": 0.770, + "args": { + "External id": 985355,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345940020964.199, "dur": 28.626, + "args": { + "External id": 985356,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940020967.109, "dur": 0.489, + "args": { + "External id": 985357,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940020969.336, "dur": 0.515, + "args": { + "External id": 985358,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940020971.800, "dur": 0.392, + "args": { + "External id": 985359,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940020973.864, "dur": 3.177, + "args": { + "External id": 985360,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940020978.982, "dur": 0.524, + "args": { + "External id": 985361,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940020981.216, "dur": 0.455, + "args": { + "External id": 985362,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940020983.776, "dur": 0.486, + "args": { + "External id": 985363,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940020985.528, "dur": 0.590, + "args": { + "External id": 985364,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940020988.057, "dur": 0.577, + "args": { + "External id": 985365,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940021005.397, "dur": 104.567, + "args": { + "External id": 985366,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345940021191.598, "dur": 482.074, + "args": { + "External id": 985367,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345940021220.354, "dur": 444.246, + "args": { + "External id": 985368,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18268, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345940021231.933, "dur": 426.491, + "args": { + "External id": 985369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345940021698.264, "dur": 2.794, + "args": { + "External id": 985370,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18270, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338706, "tid": 2338706, + "ts": 6345940021804.611, "dur": 29755.931, + "args": { + "External id": 985371,"Record function id": 0, "Ev Idx": 18271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940021923.168, "dur": 7.684, + "args": { + "External id": 985372,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940021939.263, "dur": 1.318, + "args": { + "External id": 985373,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940021942.710, "dur": 3.376, + "args": { + "External id": 985374,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940021950.191, "dur": 1.005, + "args": { + "External id": 985375,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940021952.739, "dur": 0.748, + "args": { + "External id": 985376,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940021954.975, "dur": 0.886, + "args": { + "External id": 985377,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940021957.660, "dur": 1.001, + "args": { + "External id": 985378,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940021962.531, "dur": 2.128, + "args": { + "External id": 985379,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940021966.158, "dur": 0.747, + "args": { + "External id": 985380,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940021968.332, "dur": 0.795, + "args": { + "External id": 985381,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940021989.387, "dur": 29517.804, + "args": { + "External id": 985382,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940022030.132, "dur": 29466.667, + "args": { + "External id": 985383,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940022048.952, "dur": 56.523, + "args": { + "External id": 985384,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345940022113.501, "dur": 29338.711, + "args": { + "External id": 985385,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940022116.544, "dur": 29333.965, + "args": { + "External id": 985386,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940022121.391, "dur": 7.692, + "args": { + "External id": 985387,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940022131.041, "dur": 29315.974, + "args": { + "External id": 985388,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940051739.749, "dur": 44.901, + "args": { + "External id": 985389,"Sequence number": 10552516, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18289 + } + }, + { + "ph": "s", "id": 403, "pid": 2338706, "tid": 2338706, "ts": 6345940051739.749, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345940051766.711, "dur": 12.585, + "args": { + "External id": 985390,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940051772.670, "dur": 6.348, + "args": { + "External id": 985391,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345940051868.264, "dur": 83.978, + "args": { + "External id": 985392,"Record function id": 0, "Ev Idx": 18292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345940051953.960, "dur": 1375.385, + "args": { + "External id": 985393,"Record function id": 0, "Ev Idx": 18293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940051997.248, "dur": 1315.133, + "args": { + "External id": 985394,"Sequence number": 10552517, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18294 + } + }, + { + "ph": "s", "id": 402, "pid": 2338706, "tid": 2338706, "ts": 6345940051997.248, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345940052138.096, "dur": 65.030, + "args": { + "External id": 985395,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940052221.019, "dur": 119.642, + "args": { + "External id": 985396,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940052357.519, "dur": 41.313, + "args": { + "External id": 985397,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940052407.348, "dur": 36.436, + "args": { + "External id": 985398,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345940052477.107, "dur": 31.670, + "args": { + "External id": 985399,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345940052532.218, "dur": 24.141, + "args": { + "External id": 985400,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345940052582.498, "dur": 163.804, + "args": { + "External id": 985401,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940052646.884, "dur": 15.512, + "args": { + "External id": 985402,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940052654.035, "dur": 7.272, + "args": { + "External id": 985403,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940052666.716, "dur": 4.774, + "args": { + "External id": 985404,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940052672.806, "dur": 1.200, + "args": { + "External id": 985405,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940052676.955, "dur": 6.676, + "args": { + "External id": 985406,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940052758.919, "dur": 53.578, + "args": { + "External id": 985407,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345940052846.353, "dur": 35.715, + "args": { + "External id": 985408,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940052894.373, "dur": 50.140, + "args": { + "External id": 985409,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940052956.845, "dur": 41.130, + "args": { + "External id": 985410,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345940053049.740, "dur": 76.510, + "args": { + "External id": 985411,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940053137.927, "dur": 48.260, + "args": { + "External id": 985412,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345940053209.140, "dur": 24.784, + "args": { + "External id": 985413,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18313 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.22)", "pid": 2338706, "tid": 2338706, + "ts": 6345940053404.205, "dur": 90.943, + "args": { + "External id": 985414,"Record function id": 0, "Ev Idx": 18314 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345940053583.983, "dur": 53.016, + "args": { + "External id": 985415,"Record function id": 0, "Ev Idx": 18315 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.23)", "pid": 2338706, "tid": 2338706, + "ts": 6345940053648.855, "dur": 32097.632, + "args": { + "External id": 985416,"Record function id": 0, "Ev Idx": 18316 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338706, "tid": 2338706, + "ts": 6345940053659.349, "dur": 1079.486, + "args": { + "External id": 985417,"Record function id": 0, "Ev Idx": 18317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940053756.269, "dur": 9.961, + "args": { + "External id": 985418,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345940053781.260, "dur": 38.567, + "args": { + "External id": 985419,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940053788.085, "dur": 2.529, + "args": { + "External id": 985420,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940053793.661, "dur": 0.514, + "args": { + "External id": 985421,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940053796.050, "dur": 0.539, + "args": { + "External id": 985422,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940053798.596, "dur": 0.682, + "args": { + "External id": 985423,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940053800.697, "dur": 0.680, + "args": { + "External id": 985424,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940053803.247, "dur": 0.696, + "args": { + "External id": 985425,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940053805.551, "dur": 3.297, + "args": { + "External id": 985426,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940053810.596, "dur": 0.503, + "args": { + "External id": 985427,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940053812.694, "dur": 0.533, + "args": { + "External id": 985428,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940053838.394, "dur": 61.365, + "args": { + "External id": 985429,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345940053938.256, "dur": 206.028, + "args": { + "External id": 985430,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940053951.195, "dur": 4.129, + "args": { + "External id": 985431,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345940053961.699, "dur": 12.794, + "args": { + "External id": 985432,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940053967.406, "dur": 6.575, + "args": { + "External id": 985433,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940053971.670, "dur": 0.880, + "args": { + "External id": 985434,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345940053982.143, "dur": 48.416, + "args": { + "External id": 985435,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940053984.465, "dur": 0.653, + "args": { + "External id": 985436,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940053987.049, "dur": 0.746, + "args": { + "External id": 985437,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940053989.596, "dur": 0.691, + "args": { + "External id": 985438,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940053992.196, "dur": 2.540, + "args": { + "External id": 985439,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940053996.555, "dur": 0.603, + "args": { + "External id": 985440,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940053998.850, "dur": 0.548, + "args": { + "External id": 985441,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940054000.983, "dur": 0.393, + "args": { + "External id": 985442,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940054003.422, "dur": 0.635, + "args": { + "External id": 985443,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940054005.523, "dur": 0.683, + "args": { + "External id": 985444,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940054048.652, "dur": 83.618, + "args": { + "External id": 985445,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345940054214.417, "dur": 420.637, + "args": { + "External id": 985446,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345940054246.826, "dur": 382.724, + "args": { + "External id": 985447,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18347, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345940054259.144, "dur": 364.012, + "args": { + "External id": 985448,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345940054658.063, "dur": 2.443, + "args": { + "External id": 985449,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18349, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338706, "tid": 2338706, + "ts": 6345940054764.012, "dur": 30725.630, + "args": { + "External id": 985450,"Record function id": 0, "Ev Idx": 18350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940054881.798, "dur": 7.153, + "args": { + "External id": 985451,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940054892.780, "dur": 1.377, + "args": { + "External id": 985452,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940054896.050, "dur": 3.494, + "args": { + "External id": 985453,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940054901.672, "dur": 1.010, + "args": { + "External id": 985454,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940054904.567, "dur": 1.128, + "args": { + "External id": 985455,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940054907.252, "dur": 0.835, + "args": { + "External id": 985456,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940054910.032, "dur": 0.875, + "args": { + "External id": 985457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940054913.306, "dur": 2.345, + "args": { + "External id": 985458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940054917.173, "dur": 0.879, + "args": { + "External id": 985459,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940054920.020, "dur": 0.853, + "args": { + "External id": 985460,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940054941.791, "dur": 30495.125, + "args": { + "External id": 985461,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940054959.702, "dur": 30467.164, + "args": { + "External id": 985462,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940054976.435, "dur": 19.423, + "args": { + "External id": 985463,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345940055001.980, "dur": 30381.184, + "args": { + "External id": 985464,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940055004.916, "dur": 30377.292, + "args": { + "External id": 985465,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940055031.055, "dur": 9.038, + "args": { + "External id": 985466,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940055042.349, "dur": 30334.719, + "args": { + "External id": 985467,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940085673.078, "dur": 40.364, + "args": { + "External id": 985468,"Sequence number": 10552518, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18368 + } + }, + { + "ph": "s", "id": 401, "pid": 2338706, "tid": 2338706, "ts": 6345940085673.078, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345940085695.260, "dur": 11.522, + "args": { + "External id": 985469,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940085700.547, "dur": 5.979, + "args": { + "External id": 985470,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345940085801.244, "dur": 83.840, + "args": { + "External id": 985471,"Record function id": 0, "Ev Idx": 18371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345940085886.530, "dur": 1392.547, + "args": { + "External id": 985472,"Record function id": 0, "Ev Idx": 18372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940085931.802, "dur": 1329.803, + "args": { + "External id": 985473,"Sequence number": 10552519, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18373 + } + }, + { + "ph": "s", "id": 400, "pid": 2338706, "tid": 2338706, "ts": 6345940085931.802, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345940086034.878, "dur": 95.491, + "args": { + "External id": 985474,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940086151.244, "dur": 119.652, + "args": { + "External id": 985475,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940086287.130, "dur": 44.022, + "args": { + "External id": 985476,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940086339.555, "dur": 37.316, + "args": { + "External id": 985477,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345940086413.008, "dur": 33.188, + "args": { + "External id": 985478,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345940086470.048, "dur": 22.273, + "args": { + "External id": 985479,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345940086518.386, "dur": 156.745, + "args": { + "External id": 985480,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940086574.405, "dur": 14.855, + "args": { + "External id": 985481,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940086581.621, "dur": 6.792, + "args": { + "External id": 985482,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940086593.667, "dur": 5.303, + "args": { + "External id": 985483,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940086600.414, "dur": 1.122, + "args": { + "External id": 985484,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940086604.413, "dur": 6.819, + "args": { + "External id": 985485,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940086688.669, "dur": 59.418, + "args": { + "External id": 985486,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345940086785.564, "dur": 38.114, + "args": { + "External id": 985487,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940086836.004, "dur": 52.080, + "args": { + "External id": 985488,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940086901.304, "dur": 44.409, + "args": { + "External id": 985489,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345940086970.802, "dur": 34.976, + "args": { + "External id": 985490,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940087033.506, "dur": 91.545, + "args": { + "External id": 985491,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345940087152.874, "dur": 27.408, + "args": { + "External id": 985492,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18392 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.23)", "pid": 2338706, "tid": 2338706, + "ts": 6345940087357.666, "dur": 91.125, + "args": { + "External id": 985493,"Record function id": 0, "Ev Idx": 18393 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345940087533.186, "dur": 53.393, + "args": { + "External id": 985494,"Record function id": 0, "Ev Idx": 18394 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.24)", "pid": 2338706, "tid": 2338706, + "ts": 6345940087598.259, "dur": 31231.882, + "args": { + "External id": 985495,"Record function id": 0, "Ev Idx": 18395 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338706, "tid": 2338706, + "ts": 6345940087609.113, "dur": 1066.460, + "args": { + "External id": 985496,"Record function id": 0, "Ev Idx": 18396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940087706.614, "dur": 10.911, + "args": { + "External id": 985497,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345940087734.685, "dur": 38.980, + "args": { + "External id": 985498,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940087741.247, "dur": 2.518, + "args": { + "External id": 985499,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940087746.931, "dur": 0.683, + "args": { + "External id": 985500,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940087749.487, "dur": 0.450, + "args": { + "External id": 985501,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940087751.740, "dur": 0.629, + "args": { + "External id": 985502,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940087753.921, "dur": 0.742, + "args": { + "External id": 985503,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940087756.359, "dur": 0.476, + "args": { + "External id": 985504,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940087758.731, "dur": 3.178, + "args": { + "External id": 985505,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940087764.169, "dur": 0.601, + "args": { + "External id": 985506,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940087766.369, "dur": 0.650, + "args": { + "External id": 985507,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940087786.853, "dur": 65.362, + "args": { + "External id": 985508,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345940087889.930, "dur": 159.574, + "args": { + "External id": 985509,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940087903.567, "dur": 4.030, + "args": { + "External id": 985510,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345940087913.413, "dur": 12.345, + "args": { + "External id": 985511,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940087918.912, "dur": 6.352, + "args": { + "External id": 985512,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940087923.202, "dur": 0.732, + "args": { + "External id": 985513,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345940087933.088, "dur": 28.239, + "args": { + "External id": 985514,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940087935.460, "dur": 0.596, + "args": { + "External id": 985515,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940087938.005, "dur": 0.641, + "args": { + "External id": 985516,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940087940.536, "dur": 0.677, + "args": { + "External id": 985517,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940087943.359, "dur": 2.730, + "args": { + "External id": 985518,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940087947.581, "dur": 0.379, + "args": { + "External id": 985519,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940087950.049, "dur": 0.471, + "args": { + "External id": 985520,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940087952.079, "dur": 0.555, + "args": { + "External id": 985521,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940087954.473, "dur": 0.424, + "args": { + "External id": 985522,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940087956.351, "dur": 0.450, + "args": { + "External id": 985523,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940087973.958, "dur": 63.636, + "args": { + "External id": 985524,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345940088154.565, "dur": 417.074, + "args": { + "External id": 985525,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345940088183.897, "dur": 382.169, + "args": { + "External id": 985526,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18426, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345940088195.871, "dur": 364.424, + "args": { + "External id": 985527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345940088594.895, "dur": 2.634, + "args": { + "External id": 985528,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18428, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338706, "tid": 2338706, + "ts": 6345940088698.016, "dur": 29873.128, + "args": { + "External id": 985529,"Record function id": 0, "Ev Idx": 18429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940088814.821, "dur": 7.575, + "args": { + "External id": 985530,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940088826.154, "dur": 1.257, + "args": { + "External id": 985531,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940088829.558, "dur": 3.333, + "args": { + "External id": 985532,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940088834.883, "dur": 0.813, + "args": { + "External id": 985533,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940088837.469, "dur": 0.934, + "args": { + "External id": 985534,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940088839.816, "dur": 0.800, + "args": { + "External id": 985535,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940088842.476, "dur": 0.730, + "args": { + "External id": 985536,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940088845.349, "dur": 2.475, + "args": { + "External id": 985537,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940088849.320, "dur": 1.087, + "args": { + "External id": 985538,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940088852.033, "dur": 0.657, + "args": { + "External id": 985539,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940088876.245, "dur": 29642.008, + "args": { + "External id": 985540,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940088895.635, "dur": 29612.304, + "args": { + "External id": 985541,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940088911.400, "dur": 20.407, + "args": { + "External id": 985542,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345940088939.428, "dur": 29524.704, + "args": { + "External id": 985543,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940088942.337, "dur": 29519.986, + "args": { + "External id": 985544,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940088946.883, "dur": 6.587, + "args": { + "External id": 985545,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940088955.269, "dur": 29503.411, + "args": { + "External id": 985546,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940118754.374, "dur": 41.121, + "args": { + "External id": 985547,"Sequence number": 10552520, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18447 + } + }, + { + "ph": "s", "id": 399, "pid": 2338706, "tid": 2338706, "ts": 6345940118754.374, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345940118777.255, "dur": 11.917, + "args": { + "External id": 985548,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940118782.589, "dur": 6.255, + "args": { + "External id": 985549,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345940118879.652, "dur": 86.605, + "args": { + "External id": 985550,"Record function id": 0, "Ev Idx": 18450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345940118967.721, "dur": 1413.721, + "args": { + "External id": 985551,"Record function id": 0, "Ev Idx": 18451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940119041.387, "dur": 1322.916, + "args": { + "External id": 985552,"Sequence number": 10552521, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18452 + } + }, + { + "ph": "s", "id": 398, "pid": 2338706, "tid": 2338706, "ts": 6345940119041.387, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345940119169.666, "dur": 63.241, + "args": { + "External id": 985553,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940119251.716, "dur": 120.927, + "args": { + "External id": 985554,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940119388.957, "dur": 43.436, + "args": { + "External id": 985555,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940119441.154, "dur": 37.402, + "args": { + "External id": 985556,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345940119513.578, "dur": 33.483, + "args": { + "External id": 985557,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345940119571.045, "dur": 22.398, + "args": { + "External id": 985558,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345940119620.274, "dur": 158.621, + "args": { + "External id": 985559,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940119680.992, "dur": 15.455, + "args": { + "External id": 985560,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940119688.037, "dur": 7.558, + "args": { + "External id": 985561,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940119700.656, "dur": 4.624, + "args": { + "External id": 985562,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940119706.710, "dur": 1.199, + "args": { + "External id": 985563,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940119711.033, "dur": 6.717, + "args": { + "External id": 985564,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940119794.517, "dur": 58.901, + "args": { + "External id": 985565,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345940119893.647, "dur": 36.899, + "args": { + "External id": 985566,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940119942.650, "dur": 52.535, + "args": { + "External id": 985567,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940120025.337, "dur": 86.414, + "args": { + "External id": 985568,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345940120148.463, "dur": 37.969, + "args": { + "External id": 985569,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940120196.204, "dur": 47.606, + "args": { + "External id": 985570,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345940120264.833, "dur": 21.740, + "args": { + "External id": 985571,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18471 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.24)", "pid": 2338706, "tid": 2338706, + "ts": 6345940120456.658, "dur": 95.351, + "args": { + "External id": 985572,"Record function id": 0, "Ev Idx": 18472 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338706, "tid": 2338706, + "ts": 6345940120637.694, "dur": 55.348, + "args": { + "External id": 985573,"Record function id": 0, "Ev Idx": 18473 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.25)", "pid": 2338706, "tid": 2338706, + "ts": 6345940120704.575, "dur": 30659.933, + "args": { + "External id": 985574,"Record function id": 0, "Ev Idx": 18474 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 2338706, "tid": 2338706, + "ts": 6345940120714.212, "dur": 1146.950, + "args": { + "External id": 985575,"Record function id": 0, "Ev Idx": 18475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940120814.163, "dur": 10.382, + "args": { + "External id": 985576,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345940120841.756, "dur": 39.027, + "args": { + "External id": 985577,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940120848.708, "dur": 2.478, + "args": { + "External id": 985578,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940120854.315, "dur": 0.538, + "args": { + "External id": 985579,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940120856.732, "dur": 0.709, + "args": { + "External id": 985580,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940120859.240, "dur": 0.545, + "args": { + "External id": 985581,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940120861.562, "dur": 0.620, + "args": { + "External id": 985582,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940120864.163, "dur": 0.515, + "args": { + "External id": 985583,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940120866.385, "dur": 2.817, + "args": { + "External id": 985584,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940120871.545, "dur": 0.623, + "args": { + "External id": 985585,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940120873.757, "dur": 0.507, + "args": { + "External id": 985586,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940120894.037, "dur": 59.524, + "args": { + "External id": 985587,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338706, "tid": 2338706, + "ts": 6345940120992.883, "dur": 257.626, + "args": { + "External id": 985588,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940121026.541, "dur": 7.646, + "args": { + "External id": 985589,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338706, "tid": 2338706, + "ts": 6345940121042.355, "dur": 57.774, + "args": { + "External id": 985590,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940121047.912, "dur": 51.649, + "args": { + "External id": 985591,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "27264000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940121094.468, "dur": 1.440, + "args": { + "External id": 985592,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338706, "tid": 2338706, + "ts": 6345940121110.278, "dur": 59.802, + "args": { + "External id": 985593,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940121113.704, "dur": 0.721, + "args": { + "External id": 985594,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940121116.049, "dur": 0.483, + "args": { + "External id": 985595,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940121118.000, "dur": 0.296, + "args": { + "External id": 985596,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940121120.115, "dur": 3.187, + "args": { + "External id": 985597,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940121125.165, "dur": 0.466, + "args": { + "External id": 985598,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940121127.292, "dur": 0.515, + "args": { + "External id": 985599,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940121137.434, "dur": 3.885, + "args": { + "External id": 985600,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940121162.346, "dur": 0.691, + "args": { + "External id": 985601,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940121164.730, "dur": 0.563, + "args": { + "External id": 985602,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940121186.208, "dur": 53.308, + "args": { + "External id": 985603,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338706, "tid": 2338706, + "ts": 6345940121319.692, "dur": 434.029, + "args": { + "External id": 985604,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345940121353.497, "dur": 394.768, + "args": { + "External id": 985605,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18505, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338706, "tid": 2338706, + "ts": 6345940121367.618, "dur": 374.265, + "args": { + "External id": 985606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345940121776.081, "dur": 2.829, + "args": { + "External id": 985607,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18507, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 2338706, "tid": 2338706, + "ts": 6345940121884.917, "dur": 29204.071, + "args": { + "External id": 985608,"Record function id": 0, "Ev Idx": 18508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940122004.659, "dur": 28.528, + "args": { + "External id": 985609,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940122040.187, "dur": 1.225, + "args": { + "External id": 985610,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940122043.372, "dur": 3.915, + "args": { + "External id": 985611,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940122049.181, "dur": 1.142, + "args": { + "External id": 985612,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940122088.105, "dur": 3.193, + "args": { + "External id": 985613,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940122095.581, "dur": 1.178, + "args": { + "External id": 985614,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940122098.800, "dur": 0.996, + "args": { + "External id": 985615,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940122102.246, "dur": 2.641, + "args": { + "External id": 985616,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940122106.837, "dur": 1.033, + "args": { + "External id": 985617,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940122109.995, "dur": 1.029, + "args": { + "External id": 985618,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940122134.013, "dur": 28857.909, + "args": { + "External id": 985619,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940122153.441, "dur": 28828.422, + "args": { + "External id": 985620,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940122170.698, "dur": 20.107, + "args": { + "External id": 985621,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345940122197.665, "dur": 28741.613, + "args": { + "External id": 985622,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940122201.129, "dur": 28736.409, + "args": { + "External id": 985623,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940122205.544, "dur": 7.214, + "args": { + "External id": 985624,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940122214.581, "dur": 28719.529, + "args": { + "External id": 985625,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940151286.200, "dur": 42.799, + "args": { + "External id": 985626,"Sequence number": 10552522, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18526 + } + }, + { + "ph": "s", "id": 397, "pid": 2338706, "tid": 2338706, "ts": 6345940151286.200, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345940151309.938, "dur": 12.170, + "args": { + "External id": 985627,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940151315.431, "dur": 6.403, + "args": { + "External id": 985628,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345940151416.941, "dur": 88.609, + "args": { + "External id": 985629,"Record function id": 0, "Ev Idx": 18529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338706, "tid": 2338706, + "ts": 6345940151506.938, "dur": 1335.677, + "args": { + "External id": 985630,"Record function id": 0, "Ev Idx": 18530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940151553.128, "dur": 1272.993, + "args": { + "External id": 985631,"Sequence number": 10552523, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18531 + } + }, + { + "ph": "s", "id": 396, "pid": 2338706, "tid": 2338706, "ts": 6345940151553.128, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345940151637.381, "dur": 59.814, + "args": { + "External id": 985632,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940151711.820, "dur": 120.072, + "args": { + "External id": 985633,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940151846.815, "dur": 44.149, + "args": { + "External id": 985634,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940151899.542, "dur": 37.267, + "args": { + "External id": 985635,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338706, "tid": 2338706, + "ts": 6345940151967.097, "dur": 32.479, + "args": { + "External id": 985636,"kernel_hash": "cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jm/cjmcqz64yvhehczbluucptutsrk7tnydhfpcpgubhbobfvggfk4t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338706, "tid": 2338706, + "ts": 6345940152050.343, "dur": 70.399, + "args": { + "External id": 985637,"kernel_hash": "c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/73/c73gzh6nuvbz6lmwuwolvfztyyldp6eupvut57avgqbs3cxeovs2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345940152149.539, "dur": 160.792, + "args": { + "External id": 985638,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940152208.535, "dur": 17.047, + "args": { + "External id": 985639,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940152216.156, "dur": 8.509, + "args": { + "External id": 985640,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940152230.176, "dur": 4.527, + "args": { + "External id": 985641,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940152236.212, "dur": 1.471, + "args": { + "External id": 985642,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940152240.442, "dur": 6.193, + "args": { + "External id": 985643,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940152323.898, "dur": 69.234, + "args": { + "External id": 985644,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338706, "tid": 2338706, + "ts": 6345940152429.890, "dur": 39.925, + "args": { + "External id": 985645,"kernel_hash": "cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/vu/cvumbknuhbwm7xbr365sswb3nb5beykodkjazo5efrnsnox6hxls.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940152482.297, "dur": 53.321, + "args": { + "External id": 985646,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940152553.142, "dur": 44.007, + "args": { + "External id": 985647,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338706, "tid": 2338706, + "ts": 6345940152622.813, "dur": 33.223, + "args": { + "External id": 985648,"kernel_hash": "cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/fg/cfgxszr4z6danmlluxsg5ihv5lmhtc5ofjjkq45okcrthgtnm6sd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940152664.135, "dur": 42.863, + "args": { + "External id": 985649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338706, "tid": 2338706, + "ts": 6345940152726.403, "dur": 22.190, + "args": { + "External id": 985650,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18550 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.25)", "pid": 2338706, "tid": 2338706, + "ts": 6345940152915.887, "dur": 39.231, + "args": { + "External id": 985651,"Record function id": 0, "Ev Idx": 18551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940153184.297, "dur": 342.591, + "args": { + "External id": 985652,"Sequence number": 10552524, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18552 + } + }, + { + "ph": "s", "id": 395, "pid": 2338706, "tid": 2338706, "ts": 6345940153184.297, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940153226.541, "dur": 9.792, + "args": { + "External id": 985653,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940153228.784, "dur": 7.029, + "args": { + "External id": 985654,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940153247.975, "dur": 14.978, + "args": { + "External id": 985655,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940153252.444, "dur": 9.710, + "args": { + "External id": 985656,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940153271.851, "dur": 5.465, + "args": { + "External id": 985657,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940153504.565, "dur": 5.151, + "args": { + "External id": 985658,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940153506.389, "dur": 3.011, + "args": { + "External id": 985659,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940153561.808, "dur": 153.740, + "args": { + "External id": 985660,"Sequence number": 10552525, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940153564.575, "dur": 14.457, + "args": { + "External id": 985661,"Sequence number": 10552525, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18561 + } + }, + { + "ph": "s", "id": 394, "pid": 2338706, "tid": 2338706, "ts": 6345940153564.575, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940153569.967, "dur": 6.944, + "args": { + "External id": 985662,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940153574.101, "dur": 2.360, + "args": { + "External id": 985663,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940153581.276, "dur": 133.825, + "args": { + "External id": 985664,"Sequence number": 10552526, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940153584.514, "dur": 6.393, + "args": { + "External id": 985665,"Sequence number": 10552526, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940153586.466, "dur": 4.228, + "args": { + "External id": 985666,"Sequence number": 10552526, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18566 + } + }, + { + "ph": "s", "id": 393, "pid": 2338706, "tid": 2338706, "ts": 6345940153586.466, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940153593.332, "dur": 107.998, + "args": { + "External id": 985667,"Sequence number": 10552527, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18567 + } + }, + { + "ph": "s", "id": 392, "pid": 2338706, "tid": 2338706, "ts": 6345940153593.332, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940153705.648, "dur": 8.270, + "args": { + "External id": 985668,"Sequence number": 10552528, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18568 + } + }, + { + "ph": "s", "id": 391, "pid": 2338706, "tid": 2338706, "ts": 6345940153705.648, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940153727.948, "dur": 74.105, + "args": { + "External id": 985669,"Sequence number": 10552529, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940153729.027, "dur": 7.707, + "args": { + "External id": 985670,"Sequence number": 10552529, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18570 + } + }, + { + "ph": "s", "id": 390, "pid": 2338706, "tid": 2338706, "ts": 6345940153729.027, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940153731.788, "dur": 3.681, + "args": { + "External id": 985671,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940153733.925, "dur": 1.329, + "args": { + "External id": 985672,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940153737.348, "dur": 64.471, + "args": { + "External id": 985673,"Sequence number": 10552530, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940153738.743, "dur": 5.527, + "args": { + "External id": 985674,"Sequence number": 10552530, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940153740.033, "dur": 4.072, + "args": { + "External id": 985675,"Sequence number": 10552530, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18575 + } + }, + { + "ph": "s", "id": 389, "pid": 2338706, "tid": 2338706, "ts": 6345940153740.033, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940153744.917, "dur": 50.491, + "args": { + "External id": 985676,"Sequence number": 10552531, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18576 + } + }, + { + "ph": "s", "id": 388, "pid": 2338706, "tid": 2338706, "ts": 6345940153744.917, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940153797.639, "dur": 3.718, + "args": { + "External id": 985677,"Sequence number": 10552532, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18577 + } + }, + { + "ph": "s", "id": 387, "pid": 2338706, "tid": 2338706, "ts": 6345940153797.639, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940153811.315, "dur": 70.275, + "args": { + "External id": 985678,"Sequence number": 10552533, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940153812.211, "dur": 7.699, + "args": { + "External id": 985679,"Sequence number": 10552533, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18579 + } + }, + { + "ph": "s", "id": 386, "pid": 2338706, "tid": 2338706, "ts": 6345940153812.211, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940153815.021, "dur": 3.639, + "args": { + "External id": 985680,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940153817.636, "dur": 0.869, + "args": { + "External id": 985681,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940153820.885, "dur": 60.462, + "args": { + "External id": 985682,"Sequence number": 10552534, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940153822.337, "dur": 6.288, + "args": { + "External id": 985683,"Sequence number": 10552534, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940153823.641, "dur": 4.820, + "args": { + "External id": 985684,"Sequence number": 10552534, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18584 + } + }, + { + "ph": "s", "id": 385, "pid": 2338706, "tid": 2338706, "ts": 6345940153823.641, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940153829.348, "dur": 44.373, + "args": { + "External id": 985685,"Sequence number": 10552535, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18585 + } + }, + { + "ph": "s", "id": 384, "pid": 2338706, "tid": 2338706, "ts": 6345940153829.348, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940153875.941, "dur": 4.999, + "args": { + "External id": 985686,"Sequence number": 10552536, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18586 + } + }, + { + "ph": "s", "id": 383, "pid": 2338706, "tid": 2338706, "ts": 6345940153875.941, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940153905.411, "dur": 4.761, + "args": { + "External id": 985687,"Sequence number": 10552537, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940153906.460, "dur": 3.565, + "args": { + "External id": 985688,"Sequence number": 10552537, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18588 + } + }, + { + "ph": "s", "id": 382, "pid": 2338706, "tid": 2338706, "ts": 6345940153906.460, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940153919.109, "dur": 7.058, + "args": { + "External id": 985689,"Sequence number": 10552538, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940153920.317, "dur": 5.695, + "args": { + "External id": 985690,"Sequence number": 10552538, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18590 + } + }, + { + "ph": "s", "id": 381, "pid": 2338706, "tid": 2338706, "ts": 6345940153920.317, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940153931.287, "dur": 4.051, + "args": { + "External id": 985691,"Sequence number": 10552539, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940153932.501, "dur": 2.665, + "args": { + "External id": 985692,"Sequence number": 10552539, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18592 + } + }, + { + "ph": "s", "id": 380, "pid": 2338706, "tid": 2338706, "ts": 6345940153932.501, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940153976.891, "dur": 281.783, + "args": { + "External id": 985693,"Sequence number": 10552540, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18593 + } + }, + { + "ph": "s", "id": 379, "pid": 2338706, "tid": 2338706, "ts": 6345940153976.891, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940154002.323, "dur": 29.927, + "args": { + "External id": 985694,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940154005.235, "dur": 24.882, + "args": { + "External id": 985695,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940154279.609, "dur": 148.456, + "args": { + "External id": 985696,"Sequence number": 10552541, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18596 + } + }, + { + "ph": "s", "id": 378, "pid": 2338706, "tid": 2338706, "ts": 6345940154279.609, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940154301.185, "dur": 10.520, + "args": { + "External id": 985697,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940154304.436, "dur": 6.710, + "args": { + "External id": 985698,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338706, "tid": 2338706, + "ts": 6345940154465.879, "dur": 211.374, + "args": { + "External id": 985699,"Sequence number": 10552542, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18599 + } + }, + { + "ph": "s", "id": 377, "pid": 2338706, "tid": 2338706, "ts": 6345940154465.879, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345940154500.535, "dur": 141.741, + "args": { + "External id": 985700,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940154556.183, "dur": 9.494, + "args": { + "External id": 985701,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940154559.709, "dur": 5.409, + "args": { + "External id": 985702,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940154569.005, "dur": 5.038, + "args": { + "External id": 985703,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940154575.468, "dur": 1.144, + "args": { + "External id": 985704,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940154579.655, "dur": 4.145, + "args": { + "External id": 985705,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2338706, + "ts": 6345940154658.915, "dur": 6.250, + "args": { + "External id": 985706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940154684.296, "dur": 8.370, + "args": { + "External id": 985707,"Sequence number": 10552543, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940154686.774, "dur": 5.668, + "args": { + "External id": 985708,"Sequence number": 10552543, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18608 + } + }, + { + "ph": "s", "id": 376, "pid": 2338706, "tid": 2338706, "ts": 6345940154686.774, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940154709.428, "dur": 133.439, + "args": { + "External id": 985709,"Sequence number": 10552544, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940154711.041, "dur": 11.636, + "args": { + "External id": 985710,"Sequence number": 10552544, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18610 + } + }, + { + "ph": "s", "id": 375, "pid": 2338706, "tid": 2338706, "ts": 6345940154711.041, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940154714.837, "dur": 6.320, + "args": { + "External id": 985711,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940154718.404, "dur": 2.427, + "args": { + "External id": 985712,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940154724.110, "dur": 118.396, + "args": { + "External id": 985713,"Sequence number": 10552545, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940154726.700, "dur": 4.400, + "args": { + "External id": 985714,"Sequence number": 10552545, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940154727.648, "dur": 3.292, + "args": { + "External id": 985715,"Sequence number": 10552545, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18615 + } + }, + { + "ph": "s", "id": 374, "pid": 2338706, "tid": 2338706, "ts": 6345940154727.648, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940154732.218, "dur": 102.236, + "args": { + "External id": 985716,"Sequence number": 10552546, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18616 + } + }, + { + "ph": "s", "id": 373, "pid": 2338706, "tid": 2338706, "ts": 6345940154732.218, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940154837.361, "dur": 4.202, + "args": { + "External id": 985717,"Sequence number": 10552547, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18617 + } + }, + { + "ph": "s", "id": 372, "pid": 2338706, "tid": 2338706, "ts": 6345940154837.361, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940154890.217, "dur": 348.175, + "args": { + "External id": 985718,"Sequence number": 10552548, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18618 + } + }, + { + "ph": "s", "id": 371, "pid": 2338706, "tid": 2338706, "ts": 6345940154890.217, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940154920.886, "dur": 3.342, + "args": { + "External id": 985719,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940154921.963, "dur": 2.034, + "args": { + "External id": 985720,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338706, "tid": 2338706, + "ts": 6345940154934.025, "dur": 8.950, + "args": { + "External id": 985721,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940154935.211, "dur": 7.617, + "args": { + "External id": 985722,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940154938.878, "dur": 3.754, + "args": { + "External id": 985723,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940154952.258, "dur": 7.723, + "args": { + "External id": 985724,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940154954.631, "dur": 4.994, + "args": { + "External id": 985725,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940154967.378, "dur": 3.471, + "args": { + "External id": 985726,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940154975.357, "dur": 3.536, + "args": { + "External id": 985727,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940155208.939, "dur": 5.560, + "args": { + "External id": 985728,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940155210.571, "dur": 3.512, + "args": { + "External id": 985729,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940155220.596, "dur": 2.708, + "args": { + "External id": 985730,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940155221.980, "dur": 1.229, + "args": { + "External id": 985731,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940155264.913, "dur": 129.779, + "args": { + "External id": 985732,"Sequence number": 10552549, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940155266.498, "dur": 11.048, + "args": { + "External id": 985733,"Sequence number": 10552549, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18633 + } + }, + { + "ph": "s", "id": 370, "pid": 2338706, "tid": 2338706, "ts": 6345940155266.498, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940155270.317, "dur": 5.695, + "args": { + "External id": 985734,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940155273.579, "dur": 2.006, + "args": { + "External id": 985735,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940155278.962, "dur": 115.392, + "args": { + "External id": 985736,"Sequence number": 10552550, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940155283.432, "dur": 5.327, + "args": { + "External id": 985737,"Sequence number": 10552550, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940155284.729, "dur": 3.830, + "args": { + "External id": 985738,"Sequence number": 10552550, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18638 + } + }, + { + "ph": "s", "id": 369, "pid": 2338706, "tid": 2338706, "ts": 6345940155284.729, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940155289.672, "dur": 93.775, + "args": { + "External id": 985739,"Sequence number": 10552551, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18639 + } + }, + { + "ph": "s", "id": 368, "pid": 2338706, "tid": 2338706, "ts": 6345940155289.672, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940155386.930, "dur": 6.564, + "args": { + "External id": 985740,"Sequence number": 10552552, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18640 + } + }, + { + "ph": "s", "id": 367, "pid": 2338706, "tid": 2338706, "ts": 6345940155386.930, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940155405.582, "dur": 76.091, + "args": { + "External id": 985741,"Sequence number": 10552553, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940155406.567, "dur": 7.132, + "args": { + "External id": 985742,"Sequence number": 10552553, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18642 + } + }, + { + "ph": "s", "id": 366, "pid": 2338706, "tid": 2338706, "ts": 6345940155406.567, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940155408.726, "dur": 3.496, + "args": { + "External id": 985743,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940155410.396, "dur": 1.597, + "args": { + "External id": 985744,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940155414.341, "dur": 67.115, + "args": { + "External id": 985745,"Sequence number": 10552554, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940155415.519, "dur": 6.020, + "args": { + "External id": 985746,"Sequence number": 10552554, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940155416.491, "dur": 4.821, + "args": { + "External id": 985747,"Sequence number": 10552554, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18647 + } + }, + { + "ph": "s", "id": 365, "pid": 2338706, "tid": 2338706, "ts": 6345940155416.491, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940155422.169, "dur": 54.031, + "args": { + "External id": 985748,"Sequence number": 10552555, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18648 + } + }, + { + "ph": "s", "id": 364, "pid": 2338706, "tid": 2338706, "ts": 6345940155422.169, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940155478.402, "dur": 2.532, + "args": { + "External id": 985749,"Sequence number": 10552556, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18649 + } + }, + { + "ph": "s", "id": 363, "pid": 2338706, "tid": 2338706, "ts": 6345940155478.402, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940155509.004, "dur": 178.070, + "args": { + "External id": 985750,"Sequence number": 10552557, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18650 + } + }, + { + "ph": "s", "id": 362, "pid": 2338706, "tid": 2338706, "ts": 6345940155509.004, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940155557.648, "dur": 5.508, + "args": { + "External id": 985751,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940155601.451, "dur": 70.866, + "args": { + "External id": 985752,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940155602.588, "dur": 5.721, + "args": { + "External id": 985753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940155604.120, "dur": 3.296, + "args": { + "External id": 985754,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940155606.346, "dur": 0.790, + "args": { + "External id": 985755,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940155609.266, "dur": 62.717, + "args": { + "External id": 985756,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940155613.116, "dur": 3.055, + "args": { + "External id": 985757,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940155614.465, "dur": 1.584, + "args": { + "External id": 985758,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940155616.983, "dur": 50.497, + "args": { + "External id": 985759,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 18659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940155669.866, "dur": 1.337, + "args": { + "External id": 985760,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2338706, + "ts": 6345940155699.143, "dur": 30.335, + "args": { + "External id": 985761,"Sequence number": 10552558, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 18661 + } + }, + { + "ph": "s", "id": 361, "pid": 2338706, "tid": 2338706, "ts": 6345940155699.143, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940155771.811, "dur": 206.558, + "args": { + "External id": 985762,"Sequence number": 10552559, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18662 + } + }, + { + "ph": "s", "id": 360, "pid": 2338706, "tid": 2338706, "ts": 6345940155771.811, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940155792.744, "dur": 3.395, + "args": { + "External id": 985763,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940155793.948, "dur": 2.003, + "args": { + "External id": 985764,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940155805.265, "dur": 8.665, + "args": { + "External id": 985765,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940155809.025, "dur": 4.456, + "args": { + "External id": 985766,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940155820.851, "dur": 4.274, + "args": { + "External id": 985767,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940155962.142, "dur": 3.755, + "args": { + "External id": 985768,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940155963.783, "dur": 1.877, + "args": { + "External id": 985769,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940156001.211, "dur": 180.857, + "args": { + "External id": 985770,"Sequence number": 10552560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940156002.192, "dur": 31.214, + "args": { + "External id": 985771,"Sequence number": 10552560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18671 + } + }, + { + "ph": "s", "id": 359, "pid": 2338706, "tid": 2338706, "ts": 6345940156002.192, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940156005.177, "dur": 25.899, + "args": { + "External id": 985772,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940156027.988, "dur": 2.492, + "args": { + "External id": 985773,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940156035.078, "dur": 146.674, + "args": { + "External id": 985774,"Sequence number": 10552561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940156037.361, "dur": 8.984, + "args": { + "External id": 985775,"Sequence number": 10552561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940156038.601, "dur": 7.556, + "args": { + "External id": 985776,"Sequence number": 10552561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18676 + } + }, + { + "ph": "s", "id": 358, "pid": 2338706, "tid": 2338706, "ts": 6345940156038.601, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940156047.128, "dur": 122.048, + "args": { + "External id": 985777,"Sequence number": 10552562, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18677 + } + }, + { + "ph": "s", "id": 357, "pid": 2338706, "tid": 2338706, "ts": 6345940156047.128, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940156173.842, "dur": 6.956, + "args": { + "External id": 985778,"Sequence number": 10552563, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18678 + } + }, + { + "ph": "s", "id": 356, "pid": 2338706, "tid": 2338706, "ts": 6345940156173.842, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940156195.155, "dur": 81.294, + "args": { + "External id": 985779,"Sequence number": 10552564, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940156196.253, "dur": 11.278, + "args": { + "External id": 985780,"Sequence number": 10552564, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18680 + } + }, + { + "ph": "s", "id": 355, "pid": 2338706, "tid": 2338706, "ts": 6345940156196.253, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940156202.443, "dur": 3.661, + "args": { + "External id": 985781,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940156204.789, "dur": 1.136, + "args": { + "External id": 985782,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940156208.388, "dur": 67.766, + "args": { + "External id": 985783,"Sequence number": 10552565, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940156210.091, "dur": 7.314, + "args": { + "External id": 985784,"Sequence number": 10552565, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940156213.802, "dur": 3.424, + "args": { + "External id": 985785,"Sequence number": 10552565, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18685 + } + }, + { + "ph": "s", "id": 354, "pid": 2338706, "tid": 2338706, "ts": 6345940156213.802, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940156218.351, "dur": 50.689, + "args": { + "External id": 985786,"Sequence number": 10552566, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18686 + } + }, + { + "ph": "s", "id": 353, "pid": 2338706, "tid": 2338706, "ts": 6345940156218.351, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940156270.958, "dur": 4.232, + "args": { + "External id": 985787,"Sequence number": 10552567, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18687 + } + }, + { + "ph": "s", "id": 352, "pid": 2338706, "tid": 2338706, "ts": 6345940156270.958, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940156285.595, "dur": 65.424, + "args": { + "External id": 985788,"Sequence number": 10552568, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940156286.408, "dur": 6.713, + "args": { + "External id": 985789,"Sequence number": 10552568, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18689 + } + }, + { + "ph": "s", "id": 351, "pid": 2338706, "tid": 2338706, "ts": 6345940156286.408, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940156288.806, "dur": 2.985, + "args": { + "External id": 985790,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940156290.480, "dur": 1.169, + "args": { + "External id": 985791,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940156293.833, "dur": 56.974, + "args": { + "External id": 985792,"Sequence number": 10552569, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940156295.132, "dur": 8.201, + "args": { + "External id": 985793,"Sequence number": 10552569, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940156296.075, "dur": 7.069, + "args": { + "External id": 985794,"Sequence number": 10552569, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18694 + } + }, + { + "ph": "s", "id": 350, "pid": 2338706, "tid": 2338706, "ts": 6345940156296.075, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940156303.887, "dur": 42.212, + "args": { + "External id": 985795,"Sequence number": 10552570, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18695 + } + }, + { + "ph": "s", "id": 349, "pid": 2338706, "tid": 2338706, "ts": 6345940156303.887, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940156347.882, "dur": 2.414, + "args": { + "External id": 985796,"Sequence number": 10552571, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18696 + } + }, + { + "ph": "s", "id": 348, "pid": 2338706, "tid": 2338706, "ts": 6345940156347.882, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940156371.636, "dur": 4.920, + "args": { + "External id": 985797,"Sequence number": 10552572, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940156372.976, "dur": 3.386, + "args": { + "External id": 985798,"Sequence number": 10552572, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18698 + } + }, + { + "ph": "s", "id": 347, "pid": 2338706, "tid": 2338706, "ts": 6345940156372.976, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940156384.921, "dur": 4.165, + "args": { + "External id": 985799,"Sequence number": 10552573, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940156386.300, "dur": 2.623, + "args": { + "External id": 985800,"Sequence number": 10552573, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18700 + } + }, + { + "ph": "s", "id": 346, "pid": 2338706, "tid": 2338706, "ts": 6345940156386.300, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940156394.034, "dur": 4.582, + "args": { + "External id": 985801,"Sequence number": 10552574, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940156395.169, "dur": 3.286, + "args": { + "External id": 985802,"Sequence number": 10552574, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18702 + } + }, + { + "ph": "s", "id": 345, "pid": 2338706, "tid": 2338706, "ts": 6345940156395.169, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940156433.061, "dur": 181.655, + "args": { + "External id": 985803,"Sequence number": 10552575, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18703 + } + }, + { + "ph": "s", "id": 344, "pid": 2338706, "tid": 2338706, "ts": 6345940156433.061, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940156458.812, "dur": 10.404, + "args": { + "External id": 985804,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940156461.912, "dur": 6.853, + "args": { + "External id": 985805,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940156630.328, "dur": 122.734, + "args": { + "External id": 985806,"Sequence number": 10552576, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18706 + } + }, + { + "ph": "s", "id": 343, "pid": 2338706, "tid": 2338706, "ts": 6345940156630.328, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940156644.610, "dur": 8.760, + "args": { + "External id": 985807,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940156647.643, "dur": 5.307, + "args": { + "External id": 985808,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338706, "tid": 2338706, + "ts": 6345940156786.312, "dur": 206.555, + "args": { + "External id": 985809,"Sequence number": 10552577, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18709 + } + }, + { + "ph": "s", "id": 342, "pid": 2338706, "tid": 2338706, "ts": 6345940156786.312, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345940156819.814, "dur": 144.538, + "args": { + "External id": 985810,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940156872.735, "dur": 7.604, + "args": { + "External id": 985811,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940156875.840, "dur": 4.056, + "args": { + "External id": 985812,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940156883.300, "dur": 3.701, + "args": { + "External id": 985813,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940156896.224, "dur": 1.281, + "args": { + "External id": 985814,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940156900.737, "dur": 4.773, + "args": { + "External id": 985815,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2338706, + "ts": 6345940156976.898, "dur": 4.736, + "args": { + "External id": 985816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940156998.992, "dur": 6.379, + "args": { + "External id": 985817,"Sequence number": 10552578, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940157000.877, "dur": 4.187, + "args": { + "External id": 985818,"Sequence number": 10552578, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18718 + } + }, + { + "ph": "s", "id": 341, "pid": 2338706, "tid": 2338706, "ts": 6345940157000.877, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940157041.818, "dur": 179.509, + "args": { + "External id": 985819,"Sequence number": 10552579, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940157043.518, "dur": 11.803, + "args": { + "External id": 985820,"Sequence number": 10552579, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18720 + } + }, + { + "ph": "s", "id": 340, "pid": 2338706, "tid": 2338706, "ts": 6345940157043.518, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940157047.148, "dur": 6.208, + "args": { + "External id": 985821,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940157050.785, "dur": 2.122, + "args": { + "External id": 985822,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940157097.711, "dur": 123.314, + "args": { + "External id": 985823,"Sequence number": 10552580, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940157100.446, "dur": 6.299, + "args": { + "External id": 985824,"Sequence number": 10552580, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940157101.751, "dur": 4.770, + "args": { + "External id": 985825,"Sequence number": 10552580, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18725 + } + }, + { + "ph": "s", "id": 339, "pid": 2338706, "tid": 2338706, "ts": 6345940157101.751, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940157107.971, "dur": 102.473, + "args": { + "External id": 985826,"Sequence number": 10552581, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18726 + } + }, + { + "ph": "s", "id": 338, "pid": 2338706, "tid": 2338706, "ts": 6345940157107.971, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940157213.280, "dur": 6.776, + "args": { + "External id": 985827,"Sequence number": 10552582, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18727 + } + }, + { + "ph": "s", "id": 337, "pid": 2338706, "tid": 2338706, "ts": 6345940157213.280, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940157269.593, "dur": 265.741, + "args": { + "External id": 985828,"Sequence number": 10552583, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18728 + } + }, + { + "ph": "s", "id": 336, "pid": 2338706, "tid": 2338706, "ts": 6345940157269.593, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940157295.224, "dur": 3.273, + "args": { + "External id": 985829,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940157296.337, "dur": 1.877, + "args": { + "External id": 985830,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338706, "tid": 2338706, + "ts": 6345940157303.560, "dur": 3.917, + "args": { + "External id": 985831,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940157304.961, "dur": 2.397, + "args": { + "External id": 985832,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940157306.366, "dur": 0.852, + "args": { + "External id": 985833,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940157316.836, "dur": 8.681, + "args": { + "External id": 985834,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940157319.494, "dur": 5.630, + "args": { + "External id": 985835,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940157332.950, "dur": 3.421, + "args": { + "External id": 985836,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940157340.941, "dur": 5.160, + "args": { + "External id": 985837,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940157510.393, "dur": 3.866, + "args": { + "External id": 985838,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940157511.903, "dur": 2.103, + "args": { + "External id": 985839,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940157518.774, "dur": 2.144, + "args": { + "External id": 985840,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940157520.043, "dur": 0.761, + "args": { + "External id": 985841,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940157556.635, "dur": 115.818, + "args": { + "External id": 985842,"Sequence number": 10552584, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940157557.953, "dur": 9.164, + "args": { + "External id": 985843,"Sequence number": 10552584, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18743 + } + }, + { + "ph": "s", "id": 335, "pid": 2338706, "tid": 2338706, "ts": 6345940157557.953, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940157561.026, "dur": 4.588, + "args": { + "External id": 985844,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940157563.759, "dur": 1.560, + "args": { + "External id": 985845,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940157568.439, "dur": 103.714, + "args": { + "External id": 985846,"Sequence number": 10552585, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940157572.381, "dur": 6.472, + "args": { + "External id": 985847,"Sequence number": 10552585, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940157573.332, "dur": 5.327, + "args": { + "External id": 985848,"Sequence number": 10552585, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18748 + } + }, + { + "ph": "s", "id": 334, "pid": 2338706, "tid": 2338706, "ts": 6345940157573.332, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940157579.748, "dur": 83.610, + "args": { + "External id": 985849,"Sequence number": 10552586, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18749 + } + }, + { + "ph": "s", "id": 333, "pid": 2338706, "tid": 2338706, "ts": 6345940157579.748, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940157665.911, "dur": 5.566, + "args": { + "External id": 985850,"Sequence number": 10552587, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18750 + } + }, + { + "ph": "s", "id": 332, "pid": 2338706, "tid": 2338706, "ts": 6345940157665.911, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940157682.177, "dur": 77.831, + "args": { + "External id": 985851,"Sequence number": 10552588, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940157683.121, "dur": 7.505, + "args": { + "External id": 985852,"Sequence number": 10552588, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18752 + } + }, + { + "ph": "s", "id": 331, "pid": 2338706, "tid": 2338706, "ts": 6345940157683.121, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940157685.646, "dur": 3.326, + "args": { + "External id": 985853,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940157687.690, "dur": 1.154, + "args": { + "External id": 985854,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940157691.228, "dur": 68.453, + "args": { + "External id": 985855,"Sequence number": 10552589, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940157692.533, "dur": 7.459, + "args": { + "External id": 985856,"Sequence number": 10552589, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940157693.384, "dur": 6.397, + "args": { + "External id": 985857,"Sequence number": 10552589, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18757 + } + }, + { + "ph": "s", "id": 330, "pid": 2338706, "tid": 2338706, "ts": 6345940157693.384, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940157700.562, "dur": 53.315, + "args": { + "External id": 985858,"Sequence number": 10552590, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18758 + } + }, + { + "ph": "s", "id": 329, "pid": 2338706, "tid": 2338706, "ts": 6345940157700.562, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940157756.593, "dur": 2.612, + "args": { + "External id": 985859,"Sequence number": 10552591, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18759 + } + }, + { + "ph": "s", "id": 328, "pid": 2338706, "tid": 2338706, "ts": 6345940157756.593, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940157783.555, "dur": 177.294, + "args": { + "External id": 985860,"Sequence number": 10552592, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18760 + } + }, + { + "ph": "s", "id": 327, "pid": 2338706, "tid": 2338706, "ts": 6345940157783.555, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940157827.957, "dur": 5.684, + "args": { + "External id": 985861,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940157874.901, "dur": 71.839, + "args": { + "External id": 985862,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940157876.121, "dur": 5.220, + "args": { + "External id": 985863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940157877.426, "dur": 2.737, + "args": { + "External id": 985864,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940157879.193, "dur": 0.674, + "args": { + "External id": 985865,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940157882.260, "dur": 64.104, + "args": { + "External id": 985866,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940157884.046, "dur": 2.580, + "args": { + "External id": 985867,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940157885.132, "dur": 1.329, + "args": { + "External id": 985868,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940157887.123, "dur": 54.597, + "args": { + "External id": 985869,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 18769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940157944.282, "dur": 1.330, + "args": { + "External id": 985870,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2338706, + "ts": 6345940157970.056, "dur": 29.841, + "args": { + "External id": 985871,"Sequence number": 10552593, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 18771 + } + }, + { + "ph": "s", "id": 326, "pid": 2338706, "tid": 2338706, "ts": 6345940157970.056, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940158106.528, "dur": 230.506, + "args": { + "External id": 985872,"Sequence number": 10552594, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18772 + } + }, + { + "ph": "s", "id": 325, "pid": 2338706, "tid": 2338706, "ts": 6345940158106.528, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940158131.807, "dur": 4.615, + "args": { + "External id": 985873,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940158132.974, "dur": 3.062, + "args": { + "External id": 985874,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940158146.067, "dur": 9.990, + "args": { + "External id": 985875,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940158149.826, "dur": 5.642, + "args": { + "External id": 985876,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940158163.168, "dur": 7.097, + "args": { + "External id": 985877,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940158318.762, "dur": 3.721, + "args": { + "External id": 985878,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940158320.191, "dur": 2.042, + "args": { + "External id": 985879,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940158361.442, "dur": 104.900, + "args": { + "External id": 985880,"Sequence number": 10552595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940158362.601, "dur": 8.949, + "args": { + "External id": 985881,"Sequence number": 10552595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18781 + } + }, + { + "ph": "s", "id": 324, "pid": 2338706, "tid": 2338706, "ts": 6345940158362.601, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940158365.276, "dur": 4.559, + "args": { + "External id": 985882,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940158367.872, "dur": 1.708, + "args": { + "External id": 985883,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940158373.042, "dur": 92.897, + "args": { + "External id": 985884,"Sequence number": 10552596, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940158375.252, "dur": 4.604, + "args": { + "External id": 985885,"Sequence number": 10552596, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940158376.257, "dur": 3.396, + "args": { + "External id": 985886,"Sequence number": 10552596, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18786 + } + }, + { + "ph": "s", "id": 323, "pid": 2338706, "tid": 2338706, "ts": 6345940158376.257, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940158380.761, "dur": 77.341, + "args": { + "External id": 985887,"Sequence number": 10552597, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18787 + } + }, + { + "ph": "s", "id": 322, "pid": 2338706, "tid": 2338706, "ts": 6345940158380.761, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940158460.812, "dur": 4.204, + "args": { + "External id": 985888,"Sequence number": 10552598, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18788 + } + }, + { + "ph": "s", "id": 321, "pid": 2338706, "tid": 2338706, "ts": 6345940158460.812, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940158476.282, "dur": 78.380, + "args": { + "External id": 985889,"Sequence number": 10552599, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940158476.833, "dur": 11.248, + "args": { + "External id": 985890,"Sequence number": 10552599, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18790 + } + }, + { + "ph": "s", "id": 320, "pid": 2338706, "tid": 2338706, "ts": 6345940158476.833, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940158483.274, "dur": 3.414, + "args": { + "External id": 985891,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940158485.261, "dur": 1.256, + "args": { + "External id": 985892,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940158488.746, "dur": 65.500, + "args": { + "External id": 985893,"Sequence number": 10552600, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940158490.059, "dur": 6.928, + "args": { + "External id": 985894,"Sequence number": 10552600, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940158493.396, "dur": 3.435, + "args": { + "External id": 985895,"Sequence number": 10552600, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18795 + } + }, + { + "ph": "s", "id": 319, "pid": 2338706, "tid": 2338706, "ts": 6345940158493.396, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940158497.671, "dur": 48.642, + "args": { + "External id": 985896,"Sequence number": 10552601, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18796 + } + }, + { + "ph": "s", "id": 318, "pid": 2338706, "tid": 2338706, "ts": 6345940158497.671, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940158548.302, "dur": 5.380, + "args": { + "External id": 985897,"Sequence number": 10552602, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18797 + } + }, + { + "ph": "s", "id": 317, "pid": 2338706, "tid": 2338706, "ts": 6345940158548.302, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940158563.269, "dur": 61.867, + "args": { + "External id": 985898,"Sequence number": 10552603, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940158563.796, "dur": 6.486, + "args": { + "External id": 985899,"Sequence number": 10552603, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18799 + } + }, + { + "ph": "s", "id": 316, "pid": 2338706, "tid": 2338706, "ts": 6345940158563.796, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940158565.989, "dur": 2.882, + "args": { + "External id": 985900,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940158567.891, "dur": 0.816, + "args": { + "External id": 985901,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940158571.225, "dur": 53.649, + "args": { + "External id": 985902,"Sequence number": 10552604, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940158572.332, "dur": 5.357, + "args": { + "External id": 985903,"Sequence number": 10552604, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940158573.236, "dur": 4.269, + "args": { + "External id": 985904,"Sequence number": 10552604, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18804 + } + }, + { + "ph": "s", "id": 315, "pid": 2338706, "tid": 2338706, "ts": 6345940158573.236, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940158578.417, "dur": 40.946, + "args": { + "External id": 985905,"Sequence number": 10552605, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18805 + } + }, + { + "ph": "s", "id": 314, "pid": 2338706, "tid": 2338706, "ts": 6345940158578.417, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940158621.691, "dur": 2.600, + "args": { + "External id": 985906,"Sequence number": 10552606, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18806 + } + }, + { + "ph": "s", "id": 313, "pid": 2338706, "tid": 2338706, "ts": 6345940158621.691, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940158642.852, "dur": 4.860, + "args": { + "External id": 985907,"Sequence number": 10552607, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940158644.203, "dur": 3.333, + "args": { + "External id": 985908,"Sequence number": 10552607, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18808 + } + }, + { + "ph": "s", "id": 312, "pid": 2338706, "tid": 2338706, "ts": 6345940158644.203, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940158655.857, "dur": 4.898, + "args": { + "External id": 985909,"Sequence number": 10552608, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940158657.367, "dur": 3.229, + "args": { + "External id": 985910,"Sequence number": 10552608, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18810 + } + }, + { + "ph": "s", "id": 311, "pid": 2338706, "tid": 2338706, "ts": 6345940158657.367, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940158665.891, "dur": 6.918, + "args": { + "External id": 985911,"Sequence number": 10552609, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940158667.536, "dur": 5.114, + "args": { + "External id": 985912,"Sequence number": 10552609, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18812 + } + }, + { + "ph": "s", "id": 310, "pid": 2338706, "tid": 2338706, "ts": 6345940158667.536, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940158705.854, "dur": 172.789, + "args": { + "External id": 985913,"Sequence number": 10552610, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18813 + } + }, + { + "ph": "s", "id": 309, "pid": 2338706, "tid": 2338706, "ts": 6345940158705.854, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940158727.282, "dur": 8.235, + "args": { + "External id": 985914,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940158729.962, "dur": 5.076, + "args": { + "External id": 985915,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940158893.303, "dur": 145.874, + "args": { + "External id": 985916,"Sequence number": 10552611, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18816 + } + }, + { + "ph": "s", "id": 308, "pid": 2338706, "tid": 2338706, "ts": 6345940158893.303, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940158907.787, "dur": 7.990, + "args": { + "External id": 985917,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940158910.239, "dur": 5.050, + "args": { + "External id": 985918,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338706, "tid": 2338706, + "ts": 6345940159113.551, "dur": 218.954, + "args": { + "External id": 985919,"Sequence number": 10552612, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18819 + } + }, + { + "ph": "s", "id": 307, "pid": 2338706, "tid": 2338706, "ts": 6345940159113.551, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345940159147.456, "dur": 153.046, + "args": { + "External id": 985920,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940159203.698, "dur": 10.274, + "args": { + "External id": 985921,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940159207.180, "dur": 6.081, + "args": { + "External id": 985922,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940159217.088, "dur": 7.468, + "args": { + "External id": 985923,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940159226.645, "dur": 1.117, + "args": { + "External id": 985924,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940159231.032, "dur": 5.046, + "args": { + "External id": 985925,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2338706, + "ts": 6345940159314.164, "dur": 5.444, + "args": { + "External id": 985926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940159339.056, "dur": 7.563, + "args": { + "External id": 985927,"Sequence number": 10552613, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940159340.784, "dur": 5.654, + "args": { + "External id": 985928,"Sequence number": 10552613, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18828 + } + }, + { + "ph": "s", "id": 306, "pid": 2338706, "tid": 2338706, "ts": 6345940159340.784, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940159360.864, "dur": 132.916, + "args": { + "External id": 985929,"Sequence number": 10552614, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940159362.122, "dur": 12.602, + "args": { + "External id": 985930,"Sequence number": 10552614, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18830 + } + }, + { + "ph": "s", "id": 305, "pid": 2338706, "tid": 2338706, "ts": 6345940159362.122, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940159368.292, "dur": 4.834, + "args": { + "External id": 985931,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940159371.247, "dur": 1.618, + "args": { + "External id": 985932,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940159376.196, "dur": 117.236, + "args": { + "External id": 985933,"Sequence number": 10552615, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940159378.462, "dur": 5.489, + "args": { + "External id": 985934,"Sequence number": 10552615, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940159379.752, "dur": 4.021, + "args": { + "External id": 985935,"Sequence number": 10552615, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18835 + } + }, + { + "ph": "s", "id": 304, "pid": 2338706, "tid": 2338706, "ts": 6345940159379.752, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940159385.254, "dur": 99.109, + "args": { + "External id": 985936,"Sequence number": 10552616, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18836 + } + }, + { + "ph": "s", "id": 303, "pid": 2338706, "tid": 2338706, "ts": 6345940159385.254, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940159487.288, "dur": 5.136, + "args": { + "External id": 985937,"Sequence number": 10552617, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18837 + } + }, + { + "ph": "s", "id": 302, "pid": 2338706, "tid": 2338706, "ts": 6345940159487.288, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940159532.984, "dur": 235.804, + "args": { + "External id": 985938,"Sequence number": 10552618, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18838 + } + }, + { + "ph": "s", "id": 301, "pid": 2338706, "tid": 2338706, "ts": 6345940159532.984, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940159554.605, "dur": 3.104, + "args": { + "External id": 985939,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940159555.847, "dur": 1.725, + "args": { + "External id": 985940,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338706, "tid": 2338706, + "ts": 6345940159562.701, "dur": 3.916, + "args": { + "External id": 985941,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940159564.079, "dur": 2.409, + "args": { + "External id": 985942,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940159565.348, "dur": 1.020, + "args": { + "External id": 985943,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940159575.270, "dur": 10.445, + "args": { + "External id": 985944,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940159577.690, "dur": 7.597, + "args": { + "External id": 985945,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940159592.615, "dur": 2.729, + "args": { + "External id": 985946,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940159599.499, "dur": 3.383, + "args": { + "External id": 985947,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940159748.174, "dur": 3.525, + "args": { + "External id": 985948,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940159749.507, "dur": 1.966, + "args": { + "External id": 985949,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940159754.866, "dur": 2.053, + "args": { + "External id": 985950,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940159755.916, "dur": 0.908, + "args": { + "External id": 985951,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940159787.100, "dur": 97.993, + "args": { + "External id": 985952,"Sequence number": 10552619, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940159788.378, "dur": 8.556, + "args": { + "External id": 985953,"Sequence number": 10552619, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18853 + } + }, + { + "ph": "s", "id": 300, "pid": 2338706, "tid": 2338706, "ts": 6345940159788.378, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940159791.457, "dur": 4.163, + "args": { + "External id": 985954,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940159793.580, "dur": 1.673, + "args": { + "External id": 985955,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940159798.018, "dur": 86.704, + "args": { + "External id": 985956,"Sequence number": 10552620, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940159799.766, "dur": 4.316, + "args": { + "External id": 985957,"Sequence number": 10552620, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940159800.434, "dur": 3.492, + "args": { + "External id": 985958,"Sequence number": 10552620, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18858 + } + }, + { + "ph": "s", "id": 299, "pid": 2338706, "tid": 2338706, "ts": 6345940159800.434, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940159804.893, "dur": 72.751, + "args": { + "External id": 985959,"Sequence number": 10552621, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18859 + } + }, + { + "ph": "s", "id": 298, "pid": 2338706, "tid": 2338706, "ts": 6345940159804.893, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940159880.179, "dur": 3.908, + "args": { + "External id": 985960,"Sequence number": 10552622, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18860 + } + }, + { + "ph": "s", "id": 297, "pid": 2338706, "tid": 2338706, "ts": 6345940159880.179, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940159900.180, "dur": 80.443, + "args": { + "External id": 985961,"Sequence number": 10552623, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940159900.848, "dur": 8.819, + "args": { + "External id": 985962,"Sequence number": 10552623, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18862 + } + }, + { + "ph": "s", "id": 296, "pid": 2338706, "tid": 2338706, "ts": 6345940159900.848, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940159902.886, "dur": 5.559, + "args": { + "External id": 985963,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940159904.712, "dur": 3.494, + "args": { + "External id": 985964,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940159910.343, "dur": 69.957, + "args": { + "External id": 985965,"Sequence number": 10552624, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940159914.013, "dur": 5.760, + "args": { + "External id": 985966,"Sequence number": 10552624, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940159915.107, "dur": 4.520, + "args": { + "External id": 985967,"Sequence number": 10552624, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18867 + } + }, + { + "ph": "s", "id": 295, "pid": 2338706, "tid": 2338706, "ts": 6345940159915.107, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940159920.424, "dur": 52.449, + "args": { + "External id": 985968,"Sequence number": 10552625, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18868 + } + }, + { + "ph": "s", "id": 294, "pid": 2338706, "tid": 2338706, "ts": 6345940159920.424, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940159975.451, "dur": 4.030, + "args": { + "External id": 985969,"Sequence number": 10552626, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18869 + } + }, + { + "ph": "s", "id": 293, "pid": 2338706, "tid": 2338706, "ts": 6345940159975.451, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940160003.411, "dur": 250.242, + "args": { + "External id": 985970,"Sequence number": 10552627, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18870 + } + }, + { + "ph": "s", "id": 292, "pid": 2338706, "tid": 2338706, "ts": 6345940160003.411, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940160106.187, "dur": 7.233, + "args": { + "External id": 985971,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940160154.549, "dur": 81.859, + "args": { + "External id": 985972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940160155.539, "dur": 7.349, + "args": { + "External id": 985973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940160158.078, "dur": 3.427, + "args": { + "External id": 985974,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940160160.210, "dur": 1.054, + "args": { + "External id": 985975,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940160163.884, "dur": 72.146, + "args": { + "External id": 985976,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940160165.742, "dur": 3.076, + "args": { + "External id": 985977,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940160166.841, "dur": 1.784, + "args": { + "External id": 985978,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940160171.709, "dur": 56.634, + "args": { + "External id": 985979,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 18879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940160231.046, "dur": 4.127, + "args": { + "External id": 985980,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2338706, + "ts": 6345940160264.741, "dur": 30.940, + "args": { + "External id": 985981,"Sequence number": 10552628, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 18881 + } + }, + { + "ph": "s", "id": 291, "pid": 2338706, "tid": 2338706, "ts": 6345940160264.741, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940160340.893, "dur": 218.871, + "args": { + "External id": 985982,"Sequence number": 10552629, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18882 + } + }, + { + "ph": "s", "id": 290, "pid": 2338706, "tid": 2338706, "ts": 6345940160340.893, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940160363.736, "dur": 3.327, + "args": { + "External id": 985983,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940160364.597, "dur": 2.164, + "args": { + "External id": 985984,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940160376.766, "dur": 7.813, + "args": { + "External id": 985985,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940160379.550, "dur": 4.502, + "args": { + "External id": 985986,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940160392.330, "dur": 5.434, + "args": { + "External id": 985987,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940160543.549, "dur": 3.908, + "args": { + "External id": 985988,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940160545.213, "dur": 1.982, + "args": { + "External id": 985989,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940160581.304, "dur": 99.186, + "args": { + "External id": 985990,"Sequence number": 10552630, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940160582.487, "dur": 8.846, + "args": { + "External id": 985991,"Sequence number": 10552630, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18891 + } + }, + { + "ph": "s", "id": 289, "pid": 2338706, "tid": 2338706, "ts": 6345940160582.487, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940160585.448, "dur": 4.354, + "args": { + "External id": 985992,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940160587.947, "dur": 1.575, + "args": { + "External id": 985993,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940160592.382, "dur": 87.708, + "args": { + "External id": 985994,"Sequence number": 10552631, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940160593.942, "dur": 5.813, + "args": { + "External id": 985995,"Sequence number": 10552631, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940160594.881, "dur": 4.610, + "args": { + "External id": 985996,"Sequence number": 10552631, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18896 + } + }, + { + "ph": "s", "id": 288, "pid": 2338706, "tid": 2338706, "ts": 6345940160594.881, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940160600.823, "dur": 71.479, + "args": { + "External id": 985997,"Sequence number": 10552632, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18897 + } + }, + { + "ph": "s", "id": 287, "pid": 2338706, "tid": 2338706, "ts": 6345940160600.823, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940160675.267, "dur": 4.082, + "args": { + "External id": 985998,"Sequence number": 10552633, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18898 + } + }, + { + "ph": "s", "id": 286, "pid": 2338706, "tid": 2338706, "ts": 6345940160675.267, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940160690.023, "dur": 68.290, + "args": { + "External id": 985999,"Sequence number": 10552634, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940160690.559, "dur": 6.185, + "args": { + "External id": 986000,"Sequence number": 10552634, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18900 + } + }, + { + "ph": "s", "id": 285, "pid": 2338706, "tid": 2338706, "ts": 6345940160690.559, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940160692.901, "dur": 2.495, + "args": { + "External id": 986001,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940160694.363, "dur": 0.849, + "args": { + "External id": 986002,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940160697.518, "dur": 60.559, + "args": { + "External id": 986003,"Sequence number": 10552635, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940160699.142, "dur": 4.718, + "args": { + "External id": 986004,"Sequence number": 10552635, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940160700.077, "dur": 3.592, + "args": { + "External id": 986005,"Sequence number": 10552635, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18905 + } + }, + { + "ph": "s", "id": 284, "pid": 2338706, "tid": 2338706, "ts": 6345940160700.077, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940160704.401, "dur": 47.042, + "args": { + "External id": 986006,"Sequence number": 10552636, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18906 + } + }, + { + "ph": "s", "id": 283, "pid": 2338706, "tid": 2338706, "ts": 6345940160704.401, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940160753.577, "dur": 4.039, + "args": { + "External id": 986007,"Sequence number": 10552637, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18907 + } + }, + { + "ph": "s", "id": 282, "pid": 2338706, "tid": 2338706, "ts": 6345940160753.577, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940160765.662, "dur": 68.456, + "args": { + "External id": 986008,"Sequence number": 10552638, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940160766.269, "dur": 6.619, + "args": { + "External id": 986009,"Sequence number": 10552638, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18909 + } + }, + { + "ph": "s", "id": 281, "pid": 2338706, "tid": 2338706, "ts": 6345940160766.269, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940160768.998, "dur": 2.476, + "args": { + "External id": 986010,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940160770.383, "dur": 0.938, + "args": { + "External id": 986011,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940160773.678, "dur": 60.200, + "args": { + "External id": 986012,"Sequence number": 10552639, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940160774.892, "dur": 6.183, + "args": { + "External id": 986013,"Sequence number": 10552639, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940160776.004, "dur": 4.892, + "args": { + "External id": 986014,"Sequence number": 10552639, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18914 + } + }, + { + "ph": "s", "id": 280, "pid": 2338706, "tid": 2338706, "ts": 6345940160776.004, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940160781.608, "dur": 46.748, + "args": { + "External id": 986015,"Sequence number": 10552640, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18915 + } + }, + { + "ph": "s", "id": 279, "pid": 2338706, "tid": 2338706, "ts": 6345940160781.608, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940160830.504, "dur": 2.774, + "args": { + "External id": 986016,"Sequence number": 10552641, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18916 + } + }, + { + "ph": "s", "id": 278, "pid": 2338706, "tid": 2338706, "ts": 6345940160830.504, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940160851.803, "dur": 4.683, + "args": { + "External id": 986017,"Sequence number": 10552642, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940160852.789, "dur": 3.520, + "args": { + "External id": 986018,"Sequence number": 10552642, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18918 + } + }, + { + "ph": "s", "id": 277, "pid": 2338706, "tid": 2338706, "ts": 6345940160852.789, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940160864.392, "dur": 5.125, + "args": { + "External id": 986019,"Sequence number": 10552643, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940160865.663, "dur": 3.668, + "args": { + "External id": 986020,"Sequence number": 10552643, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18920 + } + }, + { + "ph": "s", "id": 276, "pid": 2338706, "tid": 2338706, "ts": 6345940160865.663, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940160874.229, "dur": 4.585, + "args": { + "External id": 986021,"Sequence number": 10552644, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940160875.359, "dur": 3.288, + "args": { + "External id": 986022,"Sequence number": 10552644, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18922 + } + }, + { + "ph": "s", "id": 275, "pid": 2338706, "tid": 2338706, "ts": 6345940160875.359, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940160909.818, "dur": 241.313, + "args": { + "External id": 986023,"Sequence number": 10552645, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18923 + } + }, + { + "ph": "s", "id": 274, "pid": 2338706, "tid": 2338706, "ts": 6345940160909.818, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940160931.063, "dur": 8.611, + "args": { + "External id": 986024,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940160934.133, "dur": 5.082, + "args": { + "External id": 986025,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940161172.532, "dur": 135.766, + "args": { + "External id": 986026,"Sequence number": 10552646, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18926 + } + }, + { + "ph": "s", "id": 273, "pid": 2338706, "tid": 2338706, "ts": 6345940161172.532, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940161190.356, "dur": 10.232, + "args": { + "External id": 986027,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940161193.432, "dur": 6.718, + "args": { + "External id": 986028,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338706, "tid": 2338706, + "ts": 6345940161341.579, "dur": 213.015, + "args": { + "External id": 986029,"Sequence number": 10552647, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18929 + } + }, + { + "ph": "s", "id": 272, "pid": 2338706, "tid": 2338706, "ts": 6345940161341.579, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345940161373.110, "dur": 148.804, + "args": { + "External id": 986030,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940161432.139, "dur": 8.532, + "args": { + "External id": 986031,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940161435.213, "dur": 4.977, + "args": { + "External id": 986032,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940161444.575, "dur": 4.378, + "args": { + "External id": 986033,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940161450.315, "dur": 1.439, + "args": { + "External id": 986034,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940161454.769, "dur": 4.500, + "args": { + "External id": 986035,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338706, "tid": 2338706, + "ts": 6345940161535.290, "dur": 5.430, + "args": { + "External id": 986036,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940161560.609, "dur": 7.605, + "args": { + "External id": 986037,"Sequence number": 10552648, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940161562.606, "dur": 5.381, + "args": { + "External id": 986038,"Sequence number": 10552648, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18938 + } + }, + { + "ph": "s", "id": 271, "pid": 2338706, "tid": 2338706, "ts": 6345940161562.606, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940161583.115, "dur": 123.564, + "args": { + "External id": 986039,"Sequence number": 10552649, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940161584.205, "dur": 9.202, + "args": { + "External id": 986040,"Sequence number": 10552649, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18940 + } + }, + { + "ph": "s", "id": 270, "pid": 2338706, "tid": 2338706, "ts": 6345940161584.205, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940161587.428, "dur": 4.717, + "args": { + "External id": 986041,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940161590.155, "dur": 1.683, + "args": { + "External id": 986042,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940161594.796, "dur": 111.587, + "args": { + "External id": 986043,"Sequence number": 10552650, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940161597.220, "dur": 4.498, + "args": { + "External id": 986044,"Sequence number": 10552650, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940161598.080, "dur": 3.420, + "args": { + "External id": 986045,"Sequence number": 10552650, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18945 + } + }, + { + "ph": "s", "id": 269, "pid": 2338706, "tid": 2338706, "ts": 6345940161598.080, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940161602.890, "dur": 96.074, + "args": { + "External id": 986046,"Sequence number": 10552651, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18946 + } + }, + { + "ph": "s", "id": 268, "pid": 2338706, "tid": 2338706, "ts": 6345940161602.890, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940161701.388, "dur": 4.136, + "args": { + "External id": 986047,"Sequence number": 10552652, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18947 + } + }, + { + "ph": "s", "id": 267, "pid": 2338706, "tid": 2338706, "ts": 6345940161701.388, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940161744.982, "dur": 241.139, + "args": { + "External id": 986048,"Sequence number": 10552653, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18948 + } + }, + { + "ph": "s", "id": 266, "pid": 2338706, "tid": 2338706, "ts": 6345940161744.982, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940161765.504, "dur": 9.452, + "args": { + "External id": 986049,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940161772.747, "dur": 1.861, + "args": { + "External id": 986050,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338706, "tid": 2338706, + "ts": 6345940161779.797, "dur": 6.648, + "args": { + "External id": 986051,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940161780.887, "dur": 5.432, + "args": { + "External id": 986052,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940161782.594, "dur": 3.600, + "args": { + "External id": 986053,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940161794.780, "dur": 10.832, + "args": { + "External id": 986054,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940161800.222, "dur": 5.070, + "args": { + "External id": 986055,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940161812.434, "dur": 3.306, + "args": { + "External id": 986056,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940161820.124, "dur": 3.924, + "args": { + "External id": 986057,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940161963.061, "dur": 3.332, + "args": { + "External id": 986058,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940161964.444, "dur": 1.715, + "args": { + "External id": 986059,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940161969.255, "dur": 4.586, + "args": { + "External id": 986060,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940161972.680, "dur": 1.048, + "args": { + "External id": 986061,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940162030.028, "dur": 174.329, + "args": { + "External id": 986062,"Sequence number": 10552654, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940162031.799, "dur": 10.930, + "args": { + "External id": 986063,"Sequence number": 10552654, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18963 + } + }, + { + "ph": "s", "id": 265, "pid": 2338706, "tid": 2338706, "ts": 6345940162031.799, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940162035.619, "dur": 5.164, + "args": { + "External id": 986064,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940162038.476, "dur": 1.817, + "args": { + "External id": 986065,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940162043.773, "dur": 160.329, + "args": { + "External id": 986066,"Sequence number": 10552655, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940162045.794, "dur": 7.519, + "args": { + "External id": 986067,"Sequence number": 10552655, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940162049.112, "dur": 4.031, + "args": { + "External id": 986068,"Sequence number": 10552655, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18968 + } + }, + { + "ph": "s", "id": 264, "pid": 2338706, "tid": 2338706, "ts": 6345940162049.112, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940162054.171, "dur": 139.227, + "args": { + "External id": 986069,"Sequence number": 10552656, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18969 + } + }, + { + "ph": "s", "id": 263, "pid": 2338706, "tid": 2338706, "ts": 6345940162054.171, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940162197.252, "dur": 5.992, + "args": { + "External id": 986070,"Sequence number": 10552657, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18970 + } + }, + { + "ph": "s", "id": 262, "pid": 2338706, "tid": 2338706, "ts": 6345940162197.252, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940162217.643, "dur": 105.663, + "args": { + "External id": 986071,"Sequence number": 10552658, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940162218.519, "dur": 12.597, + "args": { + "External id": 986072,"Sequence number": 10552658, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18972 + } + }, + { + "ph": "s", "id": 261, "pid": 2338706, "tid": 2338706, "ts": 6345940162218.519, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940162224.634, "dur": 4.406, + "args": { + "External id": 986073,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940162226.762, "dur": 1.387, + "args": { + "External id": 986074,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940162232.208, "dur": 90.886, + "args": { + "External id": 986075,"Sequence number": 10552659, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940162233.528, "dur": 9.492, + "args": { + "External id": 986076,"Sequence number": 10552659, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940162234.845, "dur": 7.954, + "args": { + "External id": 986077,"Sequence number": 10552659, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18977 + } + }, + { + "ph": "s", "id": 260, "pid": 2338706, "tid": 2338706, "ts": 6345940162234.845, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940162244.220, "dur": 72.701, + "args": { + "External id": 986078,"Sequence number": 10552660, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18978 + } + }, + { + "ph": "s", "id": 259, "pid": 2338706, "tid": 2338706, "ts": 6345940162244.220, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940162319.319, "dur": 3.300, + "args": { + "External id": 986079,"Sequence number": 10552661, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18979 + } + }, + { + "ph": "s", "id": 258, "pid": 2338706, "tid": 2338706, "ts": 6345940162319.319, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940162349.134, "dur": 187.882, + "args": { + "External id": 986080,"Sequence number": 10552662, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18980 + } + }, + { + "ph": "s", "id": 257, "pid": 2338706, "tid": 2338706, "ts": 6345940162349.134, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940162398.347, "dur": 7.160, + "args": { + "External id": 986081,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940162447.587, "dur": 73.773, + "args": { + "External id": 986082,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940162448.779, "dur": 6.575, + "args": { + "External id": 986083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940162450.537, "dur": 3.657, + "args": { + "External id": 986084,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940162453.087, "dur": 0.841, + "args": { + "External id": 986085,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940162456.646, "dur": 64.424, + "args": { + "External id": 986086,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338706, "tid": 2338706, + "ts": 6345940162458.144, "dur": 2.942, + "args": { + "External id": 986087,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940162459.614, "dur": 1.327, + "args": { + "External id": 986088,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940162461.818, "dur": 54.563, + "args": { + "External id": 986089,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 18989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338706, "tid": 2338706, + "ts": 6345940162519.220, "dur": 1.177, + "args": { + "External id": 986090,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2338706, + "ts": 6345940162548.707, "dur": 33.802, + "args": { + "External id": 986091,"Sequence number": 10552663, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 18991 + } + }, + { + "ph": "s", "id": 256, "pid": 2338706, "tid": 2338706, "ts": 6345940162548.707, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338706, "tid": 2338706, + "ts": 6345940162606.331, "dur": 55.568, + "args": { + "External id": 986092,"Sequence number": 10552664, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "-2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 18992 + } + }, + { + "ph": "s", "id": 255, "pid": 2338706, "tid": 2338706, "ts": 6345940162606.331, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2338706, "tid": 2338706, + "ts": 6345940162617.001, "dur": 37.842, + "args": { + "External id": 986093,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 18993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940162656.746, "dur": 3.302, + "args": { + "External id": 986094,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 16384], []], "Ev Idx": 18994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338706, "tid": 2338706, + "ts": 6345940162703.021, "dur": 52.917, + "args": { + "External id": 986095,"Record function id": 0, "Ev Idx": 18995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 2/0", "pid": 2338706, "tid": 2338706, + "ts": 6345940162758.155, "dur": 229.288, + "args": { + "External id": 986096,"Record function id": 0, "Ev Idx": 18996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940162801.182, "dur": 176.650, + "args": { + "External id": 986097,"Sequence number": 10552665, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1], [67108864, 16384, 4096, 1]], "Input Dims": [[4096], [8, 4096, 4, 4096]], "Ev Idx": 18997 + } + }, + { + "ph": "s", "id": 254, "pid": 2338706, "tid": 2338706, "ts": 6345940162801.182, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338706, "tid": 2338706, + "ts": 6345940162882.931, "dur": 48.715, + "args": { + "External id": 986098,"kernel_hash": "csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/sx/csxej2eqpdr6urgmnplkn2gxd6ebpciul5hudjchjoyne3jkpjo3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [131072, 4096], [4096], [131072], [], [], [], [], [], [], [], [], []], "Ev Idx": 18998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2338706, + "ts": 6345940163162.870, "dur": 54.536, + "args": { + "External id": 986099,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 18999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940163167.809, "dur": 8.014, + "args": { + "External id": 986100,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345940163179.306, "dur": 37.713, + "args": { + "External id": 986101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 19001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940163182.357, "dur": 34.215, + "args": { + "External id": 986102,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 19002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2338706, + "ts": 6345940163223.483, "dur": 20.520, + "args": { + "External id": 986103,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940163224.979, "dur": 2.662, + "args": { + "External id": 986104,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345940163228.557, "dur": 15.121, + "args": { + "External id": 986105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 19005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940163229.552, "dur": 13.638, + "args": { + "External id": 986106,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 19006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2338706, + "ts": 6345940163247.885, "dur": 19.598, + "args": { + "External id": 986107,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940163249.204, "dur": 2.718, + "args": { + "External id": 986108,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345940163252.883, "dur": 14.309, + "args": { + "External id": 986109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 19009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940163254.083, "dur": 12.700, + "args": { + "External id": 986110,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 19010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345940163280.817, "dur": 0.920, + "args": { + "External id": 986111,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 19011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338706, "tid": 2338706, + "ts": 6345940163290.867, "dur": 85.568, + "args": { + "External id": 986112,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "5", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 19012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940163369.916, "dur": 3.938, + "args": { + "External id": 986113,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 19013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940163384.857, "dur": 10.277, + "args": { + "External id": 986114,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 19014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940163389.969, "dur": 2.890, + "args": { + "External id": 986115,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 19015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940163396.554, "dur": 3.693, + "args": { + "External id": 986116,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 19016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940163398.817, "dur": 0.514, + "args": { + "External id": 986117,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 19017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940163401.661, "dur": 3.363, + "args": { + "External id": 986118,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 19018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940163403.830, "dur": 0.529, + "args": { + "External id": 986119,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 19019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940163409.177, "dur": 3.544, + "args": { + "External id": 986120,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 19020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940163411.446, "dur": 0.568, + "args": { + "External id": 986121,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 19021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940163413.904, "dur": 3.952, + "args": { + "External id": 986122,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 19022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940163416.251, "dur": 0.669, + "args": { + "External id": 986123,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 19023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940163419.057, "dur": 3.205, + "args": { + "External id": 986124,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4096, 4], [], [], [], []], "Ev Idx": 19024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940163421.162, "dur": 0.422, + "args": { + "External id": 986125,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 19025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940163426.607, "dur": 5.658, + "args": { + "External id": 986126,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4096, 4], [], []], "Ev Idx": 19026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940163430.717, "dur": 0.542, + "args": { + "External id": 986127,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 19027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940163439.907, "dur": 3.379, + "args": { + "External id": 986128,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 19028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940163442.142, "dur": 0.415, + "args": { + "External id": 986129,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2338706, + "ts": 6345940163446.704, "dur": 10.225, + "args": { + "External id": 986130,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 19030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940163452.334, "dur": 3.070, + "args": { + "External id": 986131,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940163458.164, "dur": 2.816, + "args": { + "External id": 986132,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 19032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940163459.783, "dur": 0.521, + "args": { + "External id": 986133,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 19033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940163464.034, "dur": 10.024, + "args": { + "External id": 986134,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19034 + } + }, + { + "ph": "s", "id": 253, "pid": 2338706, "tid": 2338706, "ts": 6345940163464.034, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940163471.277, "dur": 0.589, + "args": { + "External id": 986135,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940163475.159, "dur": 4.633, + "args": { + "External id": 986136,"Sequence number": 10552667, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19036 + } + }, + { + "ph": "s", "id": 252, "pid": 2338706, "tid": 2338706, "ts": 6345940163475.159, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940163478.447, "dur": 0.415, + "args": { + "External id": 986137,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2338706, + "ts": 6345940163480.849, "dur": 7.622, + "args": { + "External id": 986138,"Sequence number": 10552668, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 19038 + } + }, + { + "ph": "s", "id": 251, "pid": 2338706, "tid": 2338706, "ts": 6345940163480.849, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940163486.963, "dur": 0.403, + "args": { + "External id": 986139,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940163489.730, "dur": 5.259, + "args": { + "External id": 986140,"Sequence number": 10552669, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 19040 + } + }, + { + "ph": "s", "id": 250, "pid": 2338706, "tid": 2338706, "ts": 6345940163489.730, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940163493.207, "dur": 0.851, + "args": { + "External id": 986141,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 19041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2338706, + "ts": 6345940163500.011, "dur": 46.673, + "args": { + "External id": 986142,"Sequence number": 10552670, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2338706, + "ts": 6345940163501.938, "dur": 44.511, + "args": { + "External id": 986143,"Sequence number": 10552670, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940163505.007, "dur": 8.431, + "args": { + "External id": 986144,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 19044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940163507.647, "dur": 5.152, + "args": { + "External id": 986145,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940163515.167, "dur": 30.632, + "args": { + "External id": 986146,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 19046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940163577.741, "dur": 6.083, + "args": { + "External id": 986147,"Sequence number": 10552670, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 19047 + } + }, + { + "ph": "s", "id": 249, "pid": 2338706, "tid": 2338706, "ts": 6345940163577.741, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940163586.643, "dur": 3.735, + "args": { + "External id": 986148,"Sequence number": 10552671, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940163624.353, "dur": 128307.417, + "args": { + "External id": 986149,"Sequence number": 10552671, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 19049 + } + }, + { + "ph": "s", "id": 248, "pid": 2338706, "tid": 2338706, "ts": 6345940163624.353, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2338706, + "ts": 6345940163641.331, "dur": 30.160, + "args": { + "External id": 986150,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2338706, + "ts": 6345940163642.152, "dur": 29.120, + "args": { + "External id": 986151,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940163643.605, "dur": 5.600, + "args": { + "External id": 986152,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940163644.985, "dur": 3.813, + "args": { + "External id": 986153,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940163649.988, "dur": 20.841, + "args": { + "External id": 986154,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 19054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940163691.998, "dur": 28.217, + "args": { + "External id": 986155,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940163693.362, "dur": 8.823, + "args": { + "External id": 986156,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940163697.995, "dur": 3.770, + "args": { + "External id": 986157,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345940163703.105, "dur": 16.879, + "args": { + "External id": 986158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940163704.339, "dur": 15.146, + "args": { + "External id": 986159,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940163724.319, "dur": 19.196, + "args": { + "External id": 986160,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940163725.252, "dur": 5.420, + "args": { + "External id": 986161,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940163726.552, "dur": 3.805, + "args": { + "External id": 986162,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345940163731.355, "dur": 11.933, + "args": { + "External id": 986163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940163731.786, "dur": 11.093, + "args": { + "External id": 986164,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 19064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2338706, + "ts": 6345940163748.725, "dur": 20.312, + "args": { + "External id": 986165,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940163749.805, "dur": 4.063, + "args": { + "External id": 986166,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345940163757.252, "dur": 11.445, + "args": { + "External id": 986167,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 19067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940163757.909, "dur": 10.394, + "args": { + "External id": 986168,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338706, "tid": 2338706, + "ts": 6345940163777.660, "dur": 33.138, + "args": { + "External id": 986169,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940163815.732, "dur": 67.972, + "args": { + "External id": 986170,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940163819.309, "dur": 63.774, + "args": { + "External id": 986171,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940163826.144, "dur": 1.068, + "args": { + "External id": 986172,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345940163829.061, "dur": 29.277, + "args": { + "External id": 986173,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940163830.863, "dur": 27.233, + "args": { + "External id": 986174,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 19074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940163834.475, "dur": 5.907, + "args": { + "External id": 986175,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940163841.484, "dur": 16.195, + "args": { + "External id": 986176,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 19076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338706, "tid": 2338706, + "ts": 6345940163891.315, "dur": 121051.177, + "args": { + "External id": 986177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338706, "tid": 2338706, + "ts": 6345940163893.052, "dur": 121048.043, + "args": { + "External id": 986178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940284961.377, "dur": 12.668, + "args": { + "External id": 986179,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940284969.041, "dur": 1.566, + "args": { + "External id": 986180,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940284981.638, "dur": 184.928, + "args": { + "External id": 986181,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940284983.342, "dur": 11.464, + "args": { + "External id": 986182,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940284986.910, "dur": 6.707, + "args": { + "External id": 986183,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940284992.286, "dur": 1.016, + "args": { + "External id": 986184,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940284997.457, "dur": 168.344, + "args": { + "External id": 986185,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940284999.529, "dur": 164.739, + "args": { + "External id": 986186,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940285175.582, "dur": 9.236, + "args": { + "External id": 986187,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940285180.804, "dur": 1.178, + "args": { + "External id": 986188,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940285197.370, "dur": 6.269, + "args": { + "External id": 986189,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940285223.212, "dur": 13.691, + "args": { + "External id": 986190,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940285229.458, "dur": 7.083, + "args": { + "External id": 986191,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940285437.456, "dur": 291.338, + "args": { + "External id": 986192,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940285444.566, "dur": 4.117, + "args": { + "External id": 986193,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940285451.245, "dur": 276.813, + "args": { + "External id": 986194,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940285455.223, "dur": 0.779, + "args": { + "External id": 986195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940285458.523, "dur": 41.522, + "args": { + "External id": 986196,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940285504.111, "dur": 5.692, + "args": { + "External id": 986197,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940285508.737, "dur": 0.709, + "args": { + "External id": 986198,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940285515.963, "dur": 36.621, + "args": { + "External id": 986199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940285517.787, "dur": 2.581, + "args": { + "External id": 986200,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940285522.936, "dur": 29.193, + "args": { + "External id": 986201,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940285529.392, "dur": 4.811, + "args": { + "External id": 986202,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940285555.622, "dur": 29.278, + "args": { + "External id": 986203,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940285588.290, "dur": 20.259, + "args": { + "External id": 986204,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940285612.708, "dur": 20.069, + "args": { + "External id": 986205,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940285636.000, "dur": 20.267, + "args": { + "External id": 986206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940285659.352, "dur": 28.208, + "args": { + "External id": 986207,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940285664.101, "dur": 1.571, + "args": { + "External id": 986208,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940285667.997, "dur": 0.662, + "args": { + "External id": 986209,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940285690.225, "dur": 18.649, + "args": { + "External id": 986210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940285711.037, "dur": 15.942, + "args": { + "External id": 986211,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940285737.536, "dur": 2.488, + "args": { + "External id": 986212,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940285749.535, "dur": 5.532, + "args": { + "External id": 986213,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940285753.505, "dur": 0.407, + "args": { + "External id": 986214,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940285856.454, "dur": 93.534, + "args": { + "External id": 986215,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940285957.812, "dur": 7.387, + "args": { + "External id": 986216,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940285961.486, "dur": 1.999, + "args": { + "External id": 986217,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940285967.287, "dur": 39.557, + "args": { + "External id": 986218,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940286034.740, "dur": 11.681, + "args": { + "External id": 986219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940286038.369, "dur": 7.089, + "args": { + "External id": 986220,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940286043.173, "dur": 1.779, + "args": { + "External id": 986221,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940286087.892, "dur": 83.270, + "args": { + "External id": 986222,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940286091.458, "dur": 78.731, + "args": { + "External id": 986223,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940286180.162, "dur": 24.993, + "args": { + "External id": 986224,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940286215.140, "dur": 6.786, + "args": { + "External id": 986225,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940286219.882, "dur": 0.752, + "args": { + "External id": 986226,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940286227.247, "dur": 60.251, + "args": { + "External id": 986227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940286230.711, "dur": 4.433, + "args": { + "External id": 986228,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940286231.955, "dur": 2.557, + "args": { + "External id": 986229,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940286233.601, "dur": 0.707, + "args": { + "External id": 986230,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940286235.812, "dur": 51.234, + "args": { + "External id": 986231,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940286236.712, "dur": 49.460, + "args": { + "External id": 986232,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940286293.028, "dur": 5.404, + "args": { + "External id": 986233,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940286295.281, "dur": 1.342, + "args": { + "External id": 986234,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940286308.576, "dur": 2.105, + "args": { + "External id": 986235,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940286322.634, "dur": 6.974, + "args": { + "External id": 986236,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940286324.759, "dur": 4.499, + "args": { + "External id": 986237,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940286457.746, "dur": 232.803, + "args": { + "External id": 986238,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940286461.397, "dur": 2.221, + "args": { + "External id": 986239,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940286465.779, "dur": 224.199, + "args": { + "External id": 986240,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940286469.532, "dur": 0.712, + "args": { + "External id": 986241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940286471.808, "dur": 28.571, + "args": { + "External id": 986242,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940286502.599, "dur": 5.910, + "args": { + "External id": 986243,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940286505.168, "dur": 3.025, + "args": { + "External id": 986244,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940286509.727, "dur": 28.659, + "args": { + "External id": 986245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940286511.173, "dur": 1.247, + "args": { + "External id": 986246,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940286514.170, "dur": 23.737, + "args": { + "External id": 986247,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940286517.633, "dur": 3.430, + "args": { + "External id": 986248,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940286540.262, "dur": 26.133, + "args": { + "External id": 986249,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940286568.953, "dur": 18.760, + "args": { + "External id": 986250,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940286593.178, "dur": 18.795, + "args": { + "External id": 986251,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940286613.822, "dur": 16.801, + "args": { + "External id": 986252,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940286632.787, "dur": 24.833, + "args": { + "External id": 986253,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940286635.111, "dur": 2.898, + "args": { + "External id": 986254,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940286640.336, "dur": 0.504, + "args": { + "External id": 986255,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940286659.543, "dur": 14.560, + "args": { + "External id": 986256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940286675.436, "dur": 13.215, + "args": { + "External id": 986257,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940286700.767, "dur": 2.060, + "args": { + "External id": 986258,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940286714.749, "dur": 5.546, + "args": { + "External id": 986259,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940286717.716, "dur": 1.616, + "args": { + "External id": 986260,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940286806.515, "dur": 65.000, + "args": { + "External id": 986261,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940286877.460, "dur": 5.817, + "args": { + "External id": 986262,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940286880.967, "dur": 0.945, + "args": { + "External id": 986263,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940286884.907, "dur": 28.843, + "args": { + "External id": 986264,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940286919.068, "dur": 7.633, + "args": { + "External id": 986265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940286920.691, "dur": 5.250, + "args": { + "External id": 986266,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940286924.808, "dur": 0.868, + "args": { + "External id": 986267,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940286929.523, "dur": 51.811, + "args": { + "External id": 986268,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940286930.530, "dur": 50.118, + "args": { + "External id": 986269,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940286986.062, "dur": 19.384, + "args": { + "External id": 986270,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940287032.928, "dur": 7.057, + "args": { + "External id": 986271,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940287037.754, "dur": 0.856, + "args": { + "External id": 986272,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940287047.943, "dur": 128.292, + "args": { + "External id": 986273,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940287087.322, "dur": 8.521, + "args": { + "External id": 986274,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940287090.871, "dur": 4.004, + "args": { + "External id": 986275,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940287093.366, "dur": 1.105, + "args": { + "External id": 986276,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940287096.767, "dur": 78.894, + "args": { + "External id": 986277,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940287097.589, "dur": 77.019, + "args": { + "External id": 986278,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940287184.088, "dur": 7.504, + "args": { + "External id": 986279,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940287186.938, "dur": 2.915, + "args": { + "External id": 986280,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940287202.282, "dur": 2.060, + "args": { + "External id": 986281,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940287215.253, "dur": 6.896, + "args": { + "External id": 986282,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940287217.399, "dur": 4.461, + "args": { + "External id": 986283,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940287338.398, "dur": 230.065, + "args": { + "External id": 986284,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940287340.952, "dur": 2.086, + "args": { + "External id": 986285,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940287347.413, "dur": 220.501, + "args": { + "External id": 986286,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940287349.269, "dur": 0.480, + "args": { + "External id": 986287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940287350.905, "dur": 28.907, + "args": { + "External id": 986288,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940287381.616, "dur": 3.580, + "args": { + "External id": 986289,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940287383.873, "dur": 1.047, + "args": { + "External id": 986290,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940287386.229, "dur": 27.500, + "args": { + "External id": 986291,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940287387.730, "dur": 1.505, + "args": { + "External id": 986292,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940287390.783, "dur": 22.495, + "args": { + "External id": 986293,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940287394.636, "dur": 3.514, + "args": { + "External id": 986294,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940287415.435, "dur": 30.179, + "args": { + "External id": 986295,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940287447.444, "dur": 18.396, + "args": { + "External id": 986296,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940287469.105, "dur": 17.178, + "args": { + "External id": 986297,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940287487.750, "dur": 17.851, + "args": { + "External id": 986298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940287507.929, "dur": 24.741, + "args": { + "External id": 986299,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940287510.705, "dur": 1.744, + "args": { + "External id": 986300,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940287515.440, "dur": 0.840, + "args": { + "External id": 986301,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940287536.642, "dur": 15.048, + "args": { + "External id": 986302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940287552.916, "dur": 13.872, + "args": { + "External id": 986303,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940287576.152, "dur": 1.971, + "args": { + "External id": 986304,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940287588.898, "dur": 4.553, + "args": { + "External id": 986305,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940287591.699, "dur": 0.425, + "args": { + "External id": 986306,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940287674.781, "dur": 63.068, + "args": { + "External id": 986307,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940287743.859, "dur": 5.265, + "args": { + "External id": 986308,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940287747.344, "dur": 0.669, + "args": { + "External id": 986309,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940287752.857, "dur": 31.009, + "args": { + "External id": 986310,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940287789.250, "dur": 5.687, + "args": { + "External id": 986311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940287791.034, "dur": 3.084, + "args": { + "External id": 986312,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940287793.118, "dur": 0.756, + "args": { + "External id": 986313,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940287797.818, "dur": 48.336, + "args": { + "External id": 986314,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940287798.873, "dur": 46.318, + "args": { + "External id": 986315,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940287851.054, "dur": 18.130, + "args": { + "External id": 986316,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940287878.171, "dur": 3.772, + "args": { + "External id": 986317,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940287880.386, "dur": 0.516, + "args": { + "External id": 986318,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940287886.364, "dur": 51.640, + "args": { + "External id": 986319,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940287887.417, "dur": 3.906, + "args": { + "External id": 986320,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940287888.093, "dur": 2.597, + "args": { + "External id": 986321,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940287890.000, "dur": 0.551, + "args": { + "External id": 986322,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940287893.991, "dur": 43.555, + "args": { + "External id": 986323,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940287894.672, "dur": 42.162, + "args": { + "External id": 986324,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940287942.864, "dur": 4.317, + "args": { + "External id": 986325,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940287945.189, "dur": 0.649, + "args": { + "External id": 986326,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940287953.653, "dur": 1.523, + "args": { + "External id": 986327,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940287963.724, "dur": 9.571, + "args": { + "External id": 986328,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940287967.964, "dur": 5.024, + "args": { + "External id": 986329,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940288137.998, "dur": 224.306, + "args": { + "External id": 986330,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940288141.398, "dur": 4.127, + "args": { + "External id": 986331,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940288147.310, "dur": 214.415, + "args": { + "External id": 986332,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940288149.003, "dur": 0.600, + "args": { + "External id": 986333,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940288151.460, "dur": 28.355, + "args": { + "External id": 986334,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940288181.905, "dur": 5.133, + "args": { + "External id": 986335,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940288185.200, "dur": 1.458, + "args": { + "External id": 986336,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940288188.297, "dur": 27.443, + "args": { + "External id": 986337,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940288189.702, "dur": 1.377, + "args": { + "External id": 986338,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940288192.655, "dur": 22.758, + "args": { + "External id": 986339,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940288196.377, "dur": 2.561, + "args": { + "External id": 986340,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940288217.464, "dur": 24.833, + "args": { + "External id": 986341,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940288244.267, "dur": 18.775, + "args": { + "External id": 986342,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940288266.332, "dur": 16.837, + "args": { + "External id": 986343,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940288284.639, "dur": 16.804, + "args": { + "External id": 986344,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940288304.159, "dur": 24.886, + "args": { + "External id": 986345,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940288306.939, "dur": 1.600, + "args": { + "External id": 986346,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940288310.750, "dur": 0.823, + "args": { + "External id": 986347,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940288330.560, "dur": 14.862, + "args": { + "External id": 986348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940288346.623, "dur": 13.782, + "args": { + "External id": 986349,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940288370.649, "dur": 2.530, + "args": { + "External id": 986350,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940288385.111, "dur": 4.887, + "args": { + "External id": 986351,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940288387.879, "dur": 1.063, + "args": { + "External id": 986352,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940288474.395, "dur": 71.333, + "args": { + "External id": 986353,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940288552.027, "dur": 5.652, + "args": { + "External id": 986354,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940288555.588, "dur": 0.771, + "args": { + "External id": 986355,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940288559.327, "dur": 31.034, + "args": { + "External id": 986356,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940288596.580, "dur": 6.113, + "args": { + "External id": 986357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940288598.525, "dur": 3.387, + "args": { + "External id": 986358,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940288600.779, "dur": 0.854, + "args": { + "External id": 986359,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940288605.620, "dur": 50.552, + "args": { + "External id": 986360,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940288606.780, "dur": 48.497, + "args": { + "External id": 986361,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940288660.748, "dur": 26.272, + "args": { + "External id": 986362,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940288693.900, "dur": 5.071, + "args": { + "External id": 986363,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940288697.057, "dur": 0.829, + "args": { + "External id": 986364,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940288703.592, "dur": 56.820, + "args": { + "External id": 986365,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940288707.064, "dur": 6.517, + "args": { + "External id": 986366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940288707.959, "dur": 4.954, + "args": { + "External id": 986367,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940288709.832, "dur": 2.880, + "args": { + "External id": 986368,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940288714.412, "dur": 45.593, + "args": { + "External id": 986369,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940288714.946, "dur": 44.342, + "args": { + "External id": 986370,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940288765.139, "dur": 4.451, + "args": { + "External id": 986371,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940288767.333, "dur": 0.798, + "args": { + "External id": 986372,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940288777.568, "dur": 1.615, + "args": { + "External id": 986373,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940288788.226, "dur": 6.453, + "args": { + "External id": 986374,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940288790.397, "dur": 3.946, + "args": { + "External id": 986375,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940288897.780, "dur": 276.321, + "args": { + "External id": 986376,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940288899.992, "dur": 2.382, + "args": { + "External id": 986377,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940288904.459, "dur": 269.103, + "args": { + "External id": 986378,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940288907.755, "dur": 0.419, + "args": { + "External id": 986379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940288909.605, "dur": 24.408, + "args": { + "External id": 986380,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940288936.812, "dur": 3.685, + "args": { + "External id": 986381,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940288939.386, "dur": 0.838, + "args": { + "External id": 986382,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940288941.872, "dur": 26.837, + "args": { + "External id": 986383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940288943.482, "dur": 1.443, + "args": { + "External id": 986384,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940288946.382, "dur": 21.888, + "args": { + "External id": 986385,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940288949.581, "dur": 3.435, + "args": { + "External id": 986386,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940288970.400, "dur": 23.759, + "args": { + "External id": 986387,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940288995.999, "dur": 37.585, + "args": { + "External id": 986388,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940289038.851, "dur": 56.008, + "args": { + "External id": 986389,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940289098.259, "dur": 17.260, + "args": { + "External id": 986390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940289118.488, "dur": 24.976, + "args": { + "External id": 986391,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940289121.340, "dur": 2.368, + "args": { + "External id": 986392,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940289126.441, "dur": 0.967, + "args": { + "External id": 986393,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940289145.275, "dur": 13.086, + "args": { + "External id": 986394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940289159.688, "dur": 12.412, + "args": { + "External id": 986395,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940289183.685, "dur": 3.023, + "args": { + "External id": 986396,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940289199.785, "dur": 5.010, + "args": { + "External id": 986397,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940289202.981, "dur": 0.661, + "args": { + "External id": 986398,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940289289.682, "dur": 67.625, + "args": { + "External id": 986399,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940289363.124, "dur": 5.841, + "args": { + "External id": 986400,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940289366.651, "dur": 0.813, + "args": { + "External id": 986401,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940289370.565, "dur": 26.455, + "args": { + "External id": 986402,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940289402.839, "dur": 48.636, + "args": { + "External id": 986403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940289404.763, "dur": 45.637, + "args": { + "External id": 986404,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940289449.000, "dur": 0.968, + "args": { + "External id": 986405,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940289455.161, "dur": 49.113, + "args": { + "External id": 986406,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940289456.202, "dur": 47.380, + "args": { + "External id": 986407,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940289509.469, "dur": 17.630, + "args": { + "External id": 986408,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940289533.994, "dur": 5.215, + "args": { + "External id": 986409,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940289537.089, "dur": 0.985, + "args": { + "External id": 986410,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940289545.761, "dur": 48.813, + "args": { + "External id": 986411,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940289546.626, "dur": 4.585, + "args": { + "External id": 986412,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940289547.673, "dur": 2.838, + "args": { + "External id": 986413,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940289549.569, "dur": 0.768, + "args": { + "External id": 986414,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940289551.882, "dur": 42.327, + "args": { + "External id": 986415,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940289552.603, "dur": 41.112, + "args": { + "External id": 986416,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940289599.468, "dur": 6.734, + "args": { + "External id": 986417,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940289601.788, "dur": 2.839, + "args": { + "External id": 986418,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940289615.539, "dur": 1.900, + "args": { + "External id": 986419,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940289626.960, "dur": 7.416, + "args": { + "External id": 986420,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940289629.624, "dur": 4.425, + "args": { + "External id": 986421,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940289737.170, "dur": 223.472, + "args": { + "External id": 986422,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940289739.883, "dur": 2.531, + "args": { + "External id": 986423,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940289746.139, "dur": 213.906, + "args": { + "External id": 986424,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940289747.729, "dur": 0.589, + "args": { + "External id": 986425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940289749.842, "dur": 25.397, + "args": { + "External id": 986426,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940289777.233, "dur": 4.638, + "args": { + "External id": 986427,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940289780.458, "dur": 1.102, + "args": { + "External id": 986428,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940289782.916, "dur": 27.932, + "args": { + "External id": 986429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940289784.544, "dur": 1.923, + "args": { + "External id": 986430,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940289788.244, "dur": 22.279, + "args": { + "External id": 986431,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940289791.939, "dur": 2.704, + "args": { + "External id": 986432,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940289814.619, "dur": 26.962, + "args": { + "External id": 986433,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940289843.289, "dur": 18.653, + "args": { + "External id": 986434,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940289865.539, "dur": 16.771, + "args": { + "External id": 986435,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940289883.858, "dur": 16.372, + "args": { + "External id": 986436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940289902.649, "dur": 24.332, + "args": { + "External id": 986437,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940289905.210, "dur": 1.887, + "args": { + "External id": 986438,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940289909.718, "dur": 0.724, + "args": { + "External id": 986439,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940289928.687, "dur": 15.048, + "args": { + "External id": 986440,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940289945.207, "dur": 13.577, + "args": { + "External id": 986441,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940289967.622, "dur": 1.731, + "args": { + "External id": 986442,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940289978.910, "dur": 4.665, + "args": { + "External id": 986443,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940289981.907, "dur": 0.608, + "args": { + "External id": 986444,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940290123.407, "dur": 70.856, + "args": { + "External id": 986445,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940290201.156, "dur": 8.322, + "args": { + "External id": 986446,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940290206.234, "dur": 1.388, + "args": { + "External id": 986447,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940290211.245, "dur": 37.928, + "args": { + "External id": 986448,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940290256.025, "dur": 6.325, + "args": { + "External id": 986449,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940290257.780, "dur": 3.733, + "args": { + "External id": 986450,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940290260.491, "dur": 0.787, + "args": { + "External id": 986451,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940290265.372, "dur": 49.247, + "args": { + "External id": 986452,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940290267.232, "dur": 46.585, + "args": { + "External id": 986453,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940290319.693, "dur": 18.951, + "args": { + "External id": 986454,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940290345.620, "dur": 5.715, + "args": { + "External id": 986455,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940290349.295, "dur": 1.064, + "args": { + "External id": 986456,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940290356.162, "dur": 52.116, + "args": { + "External id": 986457,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940290359.721, "dur": 4.777, + "args": { + "External id": 986458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940290360.549, "dur": 3.270, + "args": { + "External id": 986459,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940290362.839, "dur": 0.786, + "args": { + "External id": 986460,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940290365.540, "dur": 42.364, + "args": { + "External id": 986461,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940290366.494, "dur": 40.515, + "args": { + "External id": 986462,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940290412.393, "dur": 7.924, + "args": { + "External id": 986463,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940290415.206, "dur": 3.539, + "args": { + "External id": 986464,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940290429.935, "dur": 1.870, + "args": { + "External id": 986465,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940290440.876, "dur": 6.973, + "args": { + "External id": 986466,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940290443.321, "dur": 4.232, + "args": { + "External id": 986467,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940290543.880, "dur": 219.465, + "args": { + "External id": 986468,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940290546.387, "dur": 2.001, + "args": { + "External id": 986469,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940290550.225, "dur": 212.530, + "args": { + "External id": 986470,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940290553.770, "dur": 0.684, + "args": { + "External id": 986471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940290562.019, "dur": 24.763, + "args": { + "External id": 986472,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940290589.034, "dur": 4.098, + "args": { + "External id": 986473,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940290591.819, "dur": 1.015, + "args": { + "External id": 986474,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940290594.788, "dur": 25.737, + "args": { + "External id": 986475,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940290596.434, "dur": 1.396, + "args": { + "External id": 986476,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940290599.473, "dur": 20.681, + "args": { + "External id": 986477,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940290603.316, "dur": 2.986, + "args": { + "External id": 986478,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940290622.699, "dur": 25.601, + "args": { + "External id": 986479,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940290650.140, "dur": 16.019, + "args": { + "External id": 986480,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940290669.511, "dur": 16.716, + "args": { + "External id": 986481,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940290687.855, "dur": 13.827, + "args": { + "External id": 986482,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940290704.319, "dur": 23.510, + "args": { + "External id": 986483,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940290707.220, "dur": 1.559, + "args": { + "External id": 986484,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940290711.729, "dur": 0.677, + "args": { + "External id": 986485,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940290729.539, "dur": 12.880, + "args": { + "External id": 986486,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940290743.976, "dur": 17.670, + "args": { + "External id": 986487,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940290770.551, "dur": 2.088, + "args": { + "External id": 986488,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940290782.718, "dur": 4.466, + "args": { + "External id": 986489,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940290785.586, "dur": 0.603, + "args": { + "External id": 986490,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940290856.522, "dur": 53.421, + "args": { + "External id": 986491,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940290915.147, "dur": 5.743, + "args": { + "External id": 986492,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940290918.507, "dur": 1.093, + "args": { + "External id": 986493,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940290922.466, "dur": 23.589, + "args": { + "External id": 986494,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940290950.677, "dur": 6.338, + "args": { + "External id": 986495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940290952.185, "dur": 4.066, + "args": { + "External id": 986496,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940290955.363, "dur": 0.697, + "args": { + "External id": 986497,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940290959.297, "dur": 44.070, + "args": { + "External id": 986498,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940290960.612, "dur": 42.043, + "args": { + "External id": 986499,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940291026.604, "dur": 23.373, + "args": { + "External id": 986500,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940291096.118, "dur": 6.937, + "args": { + "External id": 986501,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940291100.510, "dur": 0.937, + "args": { + "External id": 986502,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940291109.206, "dur": 63.520, + "args": { + "External id": 986503,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940291112.731, "dur": 5.179, + "args": { + "External id": 986504,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940291114.327, "dur": 2.856, + "args": { + "External id": 986505,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940291116.431, "dur": 0.574, + "args": { + "External id": 986506,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940291118.800, "dur": 53.494, + "args": { + "External id": 986507,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940291119.687, "dur": 51.839, + "args": { + "External id": 986508,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940291177.151, "dur": 7.503, + "args": { + "External id": 986509,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940291179.513, "dur": 3.577, + "args": { + "External id": 986510,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940291195.704, "dur": 1.986, + "args": { + "External id": 986511,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940291206.855, "dur": 7.038, + "args": { + "External id": 986512,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940291209.309, "dur": 4.269, + "args": { + "External id": 986513,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940291315.239, "dur": 212.246, + "args": { + "External id": 986514,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940291317.879, "dur": 2.443, + "args": { + "External id": 986515,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940291321.933, "dur": 204.845, + "args": { + "External id": 986516,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940291325.837, "dur": 0.533, + "args": { + "External id": 986517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940291328.249, "dur": 25.952, + "args": { + "External id": 986518,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940291356.239, "dur": 4.476, + "args": { + "External id": 986519,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940291359.526, "dur": 0.797, + "args": { + "External id": 986520,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940291361.870, "dur": 24.569, + "args": { + "External id": 986521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940291363.170, "dur": 1.563, + "args": { + "External id": 986522,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940291366.400, "dur": 19.681, + "args": { + "External id": 986523,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940291370.027, "dur": 2.291, + "args": { + "External id": 986524,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940291387.941, "dur": 27.443, + "args": { + "External id": 986525,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940291420.755, "dur": 17.946, + "args": { + "External id": 986526,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940291441.580, "dur": 14.927, + "args": { + "External id": 986527,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940291457.978, "dur": 15.411, + "args": { + "External id": 986528,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940291475.700, "dur": 23.146, + "args": { + "External id": 986529,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940291478.348, "dur": 1.685, + "args": { + "External id": 986530,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940291482.888, "dur": 0.936, + "args": { + "External id": 986531,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940291500.155, "dur": 12.941, + "args": { + "External id": 986532,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940291514.535, "dur": 11.004, + "args": { + "External id": 986533,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940291534.427, "dur": 1.840, + "args": { + "External id": 986534,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940291546.141, "dur": 4.318, + "args": { + "External id": 986535,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940291548.890, "dur": 0.560, + "args": { + "External id": 986536,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940291622.119, "dur": 58.292, + "args": { + "External id": 986537,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940291686.458, "dur": 5.508, + "args": { + "External id": 986538,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940291690.010, "dur": 0.870, + "args": { + "External id": 986539,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940291693.480, "dur": 28.846, + "args": { + "External id": 986540,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940291727.602, "dur": 5.550, + "args": { + "External id": 986541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940291729.403, "dur": 2.885, + "args": { + "External id": 986542,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940291731.408, "dur": 0.691, + "args": { + "External id": 986543,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940291738.288, "dur": 46.779, + "args": { + "External id": 986544,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940291739.472, "dur": 44.830, + "args": { + "External id": 986545,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940291789.766, "dur": 15.535, + "args": { + "External id": 986546,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940291811.242, "dur": 29.047, + "args": { + "External id": 986547,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940291813.957, "dur": 25.922, + "args": { + "External id": 986548,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940291819.435, "dur": 1.227, + "args": { + "External id": 986549,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345940291846.787, "dur": 34.337, + "args": { + "External id": 986550,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940291849.112, "dur": 31.777, + "args": { + "External id": 986551,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 19451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940291855.811, "dur": 4.834, + "args": { + "External id": 986552,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940291862.046, "dur": 18.248, + "args": { + "External id": 986553,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2338706, + "ts": 6345940291897.817, "dur": 5.963, + "args": { + "External id": 986554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2338706, + "ts": 6345940291899.926, "dur": 3.540, + "args": { + "External id": 986555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2338706, + "ts": 6345940291905.157, "dur": 1.339, + "args": { + "External id": 986556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2338706, + "ts": 6345940291905.737, "dur": 0.633, + "args": { + "External id": 986557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940291952.385, "dur": 27.131, + "args": { + "External id": 986558,"Sequence number": 10552672, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 19458 + } + }, + { + "ph": "s", "id": 247, "pid": 2338706, "tid": 2338706, "ts": 6345940291952.385, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940291986.202, "dur": 7.129, + "args": { + "External id": 986559,"Sequence number": 10552673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 19459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940291990.534, "dur": 1.178, + "args": { + "External id": 986560,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2338706, + "ts": 6345940291996.051, "dur": 7.386, + "args": { + "External id": 986561,"Sequence number": 10552673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "1"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 19461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940292001.452, "dur": 0.544, + "args": { + "External id": 986562,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940292027.825, "dur": 6.397, + "args": { + "External id": 986563,"Sequence number": 10552673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 19463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940292032.190, "dur": 0.785, + "args": { + "External id": 986564,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 19464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940292041.273, "dur": 6.892, + "args": { + "External id": 986565,"Sequence number": 10552673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19465 + } + }, + { + "ph": "s", "id": 246, "pid": 2338706, "tid": 2338706, "ts": 6345940292041.273, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940292045.094, "dur": 1.096, + "args": { + "External id": 986566,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940292049.352, "dur": 48.584, + "args": { + "External id": 986567,"Sequence number": 10552674, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19467 + } + }, + { + "ph": "s", "id": 245, "pid": 2338706, "tid": 2338706, "ts": 6345940292049.352, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940292093.691, "dur": 0.855, + "args": { + "External id": 986568,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2338706, + "ts": 6345940292099.921, "dur": 7.617, + "args": { + "External id": 986569,"Sequence number": 10552675, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 19469 + } + }, + { + "ph": "s", "id": 244, "pid": 2338706, "tid": 2338706, "ts": 6345940292099.921, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940292105.263, "dur": 1.173, + "args": { + "External id": 986570,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940292109.237, "dur": 7.332, + "args": { + "External id": 986571,"Sequence number": 10552676, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 19471 + } + }, + { + "ph": "s", "id": 243, "pid": 2338706, "tid": 2338706, "ts": 6345940292109.237, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940292112.582, "dur": 3.053, + "args": { + "External id": 986572,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 19472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2338706, + "ts": 6345940292122.135, "dur": 42.399, + "args": { + "External id": 986573,"Sequence number": 10552677, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2338706, + "ts": 6345940292123.995, "dur": 40.302, + "args": { + "External id": 986574,"Sequence number": 10552677, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940292127.180, "dur": 10.826, + "args": { + "External id": 986575,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 19475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940292132.032, "dur": 5.299, + "args": { + "External id": 986576,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940292139.712, "dur": 24.084, + "args": { + "External id": 986577,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 19477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940292195.433, "dur": 4.462, + "args": { + "External id": 986578,"Sequence number": 10552677, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 19478 + } + }, + { + "ph": "s", "id": 242, "pid": 2338706, "tid": 2338706, "ts": 6345940292195.433, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940292202.790, "dur": 1.030, + "args": { + "External id": 986579,"Sequence number": 10552678, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940292241.098, "dur": 44929.830, + "args": { + "External id": 986580,"Sequence number": 10552678, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 19480 + } + }, + { + "ph": "s", "id": 241, "pid": 2338706, "tid": 2338706, "ts": 6345940292241.098, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2338706, + "ts": 6345940292260.282, "dur": 31.559, + "args": { + "External id": 986581,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2338706, + "ts": 6345940292263.994, "dur": 27.640, + "args": { + "External id": 986582,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940292265.540, "dur": 5.862, + "args": { + "External id": 986583,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940292267.197, "dur": 3.730, + "args": { + "External id": 986584,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940292272.707, "dur": 18.550, + "args": { + "External id": 986585,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 19485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940292310.799, "dur": 27.450, + "args": { + "External id": 986586,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940292312.243, "dur": 6.857, + "args": { + "External id": 986587,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940292314.734, "dur": 3.964, + "args": { + "External id": 986588,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345940292320.711, "dur": 17.280, + "args": { + "External id": 986589,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940292322.811, "dur": 14.706, + "args": { + "External id": 986590,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940292342.064, "dur": 25.136, + "args": { + "External id": 986591,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940292343.171, "dur": 5.231, + "args": { + "External id": 986592,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940292344.729, "dur": 3.403, + "args": { + "External id": 986593,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345940292351.353, "dur": 15.614, + "args": { + "External id": 986594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940292351.891, "dur": 14.735, + "args": { + "External id": 986595,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 19495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2338706, + "ts": 6345940292374.871, "dur": 19.990, + "args": { + "External id": 986596,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940292376.769, "dur": 3.392, + "args": { + "External id": 986597,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345940292380.962, "dur": 13.501, + "args": { + "External id": 986598,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 19498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940292381.946, "dur": 12.191, + "args": { + "External id": 986599,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338706, "tid": 2338706, + "ts": 6345940292400.672, "dur": 29.455, + "args": { + "External id": 986600,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940292433.491, "dur": 61.327, + "args": { + "External id": 986601,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940292438.562, "dur": 55.751, + "args": { + "External id": 986602,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940292443.200, "dur": 1.008, + "args": { + "External id": 986603,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345940292445.912, "dur": 30.225, + "args": { + "External id": 986604,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940292447.971, "dur": 27.891, + "args": { + "External id": 986605,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 19505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940292451.371, "dur": 3.939, + "args": { + "External id": 986606,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940292456.415, "dur": 18.970, + "args": { + "External id": 986607,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 19507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338706, "tid": 2338706, + "ts": 6345940292499.574, "dur": 37804.999, + "args": { + "External id": 986608,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338706, "tid": 2338706, + "ts": 6345940292501.084, "dur": 37802.469, + "args": { + "External id": 986609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940330319.110, "dur": 10.406, + "args": { + "External id": 986610,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940330325.078, "dur": 1.293, + "args": { + "External id": 986611,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940330336.574, "dur": 135.642, + "args": { + "External id": 986612,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940330338.792, "dur": 10.772, + "args": { + "External id": 986613,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940330342.474, "dur": 6.093, + "args": { + "External id": 986614,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940330347.546, "dur": 0.702, + "args": { + "External id": 986615,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940330351.156, "dur": 120.118, + "args": { + "External id": 986616,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940330354.192, "dur": 116.015, + "args": { + "External id": 986617,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940330477.787, "dur": 6.072, + "args": { + "External id": 986618,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940330480.521, "dur": 1.103, + "args": { + "External id": 986619,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940330492.258, "dur": 3.847, + "args": { + "External id": 986620,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940330509.128, "dur": 12.398, + "args": { + "External id": 986621,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940330515.267, "dur": 5.887, + "args": { + "External id": 986622,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940330709.212, "dur": 427.504, + "args": { + "External id": 986623,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940330712.901, "dur": 5.600, + "args": { + "External id": 986624,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940330809.530, "dur": 326.408, + "args": { + "External id": 986625,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940330813.226, "dur": 0.556, + "args": { + "External id": 986626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940330815.524, "dur": 38.138, + "args": { + "External id": 986627,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940330856.022, "dur": 5.981, + "args": { + "External id": 986628,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940330860.696, "dur": 0.905, + "args": { + "External id": 986629,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940330864.306, "dur": 32.836, + "args": { + "External id": 986630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940330865.830, "dur": 1.464, + "args": { + "External id": 986631,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940330869.575, "dur": 26.935, + "args": { + "External id": 986632,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940330874.218, "dur": 4.839, + "args": { + "External id": 986633,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940330899.062, "dur": 30.166, + "args": { + "External id": 986634,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940330932.450, "dur": 20.680, + "args": { + "External id": 986635,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940330957.921, "dur": 21.384, + "args": { + "External id": 986636,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940330982.468, "dur": 20.237, + "args": { + "External id": 986637,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940331004.984, "dur": 86.602, + "args": { + "External id": 986638,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940331024.175, "dur": 4.418, + "args": { + "External id": 986639,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940331032.862, "dur": 0.808, + "args": { + "External id": 986640,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940331096.062, "dur": 19.802, + "args": { + "External id": 986641,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940331118.727, "dur": 15.623, + "args": { + "External id": 986642,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940331148.396, "dur": 3.386, + "args": { + "External id": 986643,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940331161.258, "dur": 6.076, + "args": { + "External id": 986644,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940331165.622, "dur": 0.653, + "args": { + "External id": 986645,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940331272.470, "dur": 95.888, + "args": { + "External id": 986646,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940331376.185, "dur": 8.359, + "args": { + "External id": 986647,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940331380.546, "dur": 1.277, + "args": { + "External id": 986648,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940331386.604, "dur": 32.716, + "args": { + "External id": 986649,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940331426.360, "dur": 13.871, + "args": { + "External id": 986650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940331431.870, "dur": 7.413, + "args": { + "External id": 986651,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940331434.575, "dur": 4.370, + "args": { + "External id": 986652,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940331444.181, "dur": 57.014, + "args": { + "External id": 986653,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940331445.471, "dur": 54.715, + "args": { + "External id": 986654,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940331507.062, "dur": 22.451, + "args": { + "External id": 986655,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940331536.853, "dur": 6.830, + "args": { + "External id": 986656,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940331542.033, "dur": 0.549, + "args": { + "External id": 986657,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940331550.062, "dur": 56.389, + "args": { + "External id": 986658,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940331551.425, "dur": 5.341, + "args": { + "External id": 986659,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940331552.364, "dur": 3.651, + "args": { + "External id": 986660,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940331555.355, "dur": 0.472, + "args": { + "External id": 986661,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940331557.714, "dur": 48.287, + "args": { + "External id": 986662,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940331558.445, "dur": 46.707, + "args": { + "External id": 986663,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940331613.500, "dur": 4.748, + "args": { + "External id": 986664,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940331615.406, "dur": 0.441, + "args": { + "External id": 986665,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940331625.244, "dur": 1.885, + "args": { + "External id": 986666,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940331637.992, "dur": 12.310, + "args": { + "External id": 986667,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940331644.066, "dur": 5.756, + "args": { + "External id": 986668,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940331775.265, "dur": 260.047, + "args": { + "External id": 986669,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940331778.538, "dur": 3.141, + "args": { + "External id": 986670,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940331783.528, "dur": 251.111, + "args": { + "External id": 986671,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940331784.844, "dur": 0.454, + "args": { + "External id": 986672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940331787.151, "dur": 27.745, + "args": { + "External id": 986673,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940331817.289, "dur": 4.536, + "args": { + "External id": 986674,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940331820.406, "dur": 1.058, + "args": { + "External id": 986675,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940331822.958, "dur": 30.222, + "args": { + "External id": 986676,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940331824.483, "dur": 3.478, + "args": { + "External id": 986677,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940331829.558, "dur": 23.152, + "args": { + "External id": 986678,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940331833.036, "dur": 2.659, + "args": { + "External id": 986679,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940331855.105, "dur": 26.247, + "args": { + "External id": 986680,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940331883.125, "dur": 20.766, + "args": { + "External id": 986681,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940331907.331, "dur": 18.167, + "args": { + "External id": 986682,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940331927.298, "dur": 23.484, + "args": { + "External id": 986683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940331952.803, "dur": 26.788, + "args": { + "External id": 986684,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940331955.412, "dur": 2.174, + "args": { + "External id": 986685,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940331960.739, "dur": 1.013, + "args": { + "External id": 986686,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940331981.557, "dur": 15.136, + "args": { + "External id": 986687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940331997.909, "dur": 34.421, + "args": { + "External id": 986688,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940332045.606, "dur": 3.139, + "args": { + "External id": 986689,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940332100.321, "dur": 7.170, + "args": { + "External id": 986690,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940332105.269, "dur": 0.747, + "args": { + "External id": 986691,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940332202.954, "dur": 78.189, + "args": { + "External id": 986692,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940332287.460, "dur": 5.740, + "args": { + "External id": 986693,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940332290.860, "dur": 0.926, + "args": { + "External id": 986694,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940332294.995, "dur": 32.665, + "args": { + "External id": 986695,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940332333.052, "dur": 7.156, + "args": { + "External id": 986696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940332334.976, "dur": 4.437, + "args": { + "External id": 986697,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940332338.169, "dur": 0.973, + "args": { + "External id": 986698,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940332343.419, "dur": 78.390, + "args": { + "External id": 986699,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940332347.341, "dur": 73.493, + "args": { + "External id": 986700,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940332428.160, "dur": 33.972, + "args": { + "External id": 986701,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940332469.500, "dur": 4.675, + "args": { + "External id": 986702,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940332472.229, "dur": 0.899, + "args": { + "External id": 986703,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940332478.693, "dur": 55.488, + "args": { + "External id": 986704,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940332479.836, "dur": 6.621, + "args": { + "External id": 986705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940332480.933, "dur": 4.894, + "args": { + "External id": 986706,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940332485.051, "dur": 0.583, + "args": { + "External id": 986707,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940332487.037, "dur": 46.550, + "args": { + "External id": 986708,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940332487.820, "dur": 44.936, + "args": { + "External id": 986709,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940332539.376, "dur": 6.783, + "args": { + "External id": 986710,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940332541.713, "dur": 3.064, + "args": { + "External id": 986711,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940332553.772, "dur": 1.762, + "args": { + "External id": 986712,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940332567.073, "dur": 7.307, + "args": { + "External id": 986713,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940332569.806, "dur": 4.268, + "args": { + "External id": 986714,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940332679.347, "dur": 208.202, + "args": { + "External id": 986715,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940332682.273, "dur": 3.214, + "args": { + "External id": 986716,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940332686.992, "dur": 199.949, + "args": { + "External id": 986717,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940332688.245, "dur": 0.457, + "args": { + "External id": 986718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940332690.162, "dur": 25.939, + "args": { + "External id": 986719,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940332718.214, "dur": 3.860, + "args": { + "External id": 986720,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940332720.885, "dur": 0.906, + "args": { + "External id": 986721,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940332723.169, "dur": 27.476, + "args": { + "External id": 986722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940332724.471, "dur": 1.500, + "args": { + "External id": 986723,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940332727.615, "dur": 22.659, + "args": { + "External id": 986724,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940332733.828, "dur": 3.391, + "args": { + "External id": 986725,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940332752.234, "dur": 28.181, + "args": { + "External id": 986726,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940332782.128, "dur": 16.583, + "args": { + "External id": 986727,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940332802.158, "dur": 15.487, + "args": { + "External id": 986728,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940332819.164, "dur": 14.329, + "args": { + "External id": 986729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940332835.535, "dur": 21.679, + "args": { + "External id": 986730,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940332838.063, "dur": 1.673, + "args": { + "External id": 986731,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940332842.176, "dur": 0.663, + "args": { + "External id": 986732,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940332859.708, "dur": 13.161, + "args": { + "External id": 986733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940332874.328, "dur": 11.570, + "args": { + "External id": 986734,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940332894.644, "dur": 2.033, + "args": { + "External id": 986735,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940332907.622, "dur": 5.412, + "args": { + "External id": 986736,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940332910.233, "dur": 0.642, + "args": { + "External id": 986737,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940332988.870, "dur": 123.704, + "args": { + "External id": 986738,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940333122.055, "dur": 9.582, + "args": { + "External id": 986739,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940333128.593, "dur": 1.305, + "args": { + "External id": 986740,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940333133.474, "dur": 35.644, + "args": { + "External id": 986741,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940333175.357, "dur": 5.690, + "args": { + "External id": 986742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940333177.089, "dur": 3.246, + "args": { + "External id": 986743,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940333179.271, "dur": 0.779, + "args": { + "External id": 986744,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940333185.018, "dur": 54.142, + "args": { + "External id": 986745,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940333186.242, "dur": 51.982, + "args": { + "External id": 986746,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940333246.636, "dur": 19.546, + "args": { + "External id": 986747,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940333273.628, "dur": 4.295, + "args": { + "External id": 986748,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940333276.403, "dur": 0.569, + "args": { + "External id": 986749,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940333282.582, "dur": 53.389, + "args": { + "External id": 986750,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940333283.833, "dur": 6.010, + "args": { + "External id": 986751,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940333284.545, "dur": 4.607, + "args": { + "External id": 986752,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940333287.954, "dur": 1.043, + "args": { + "External id": 986753,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940333290.458, "dur": 45.151, + "args": { + "External id": 986754,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940333291.135, "dur": 43.728, + "args": { + "External id": 986755,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940333341.103, "dur": 4.100, + "args": { + "External id": 986756,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940333343.267, "dur": 0.546, + "args": { + "External id": 986757,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940333352.358, "dur": 1.875, + "args": { + "External id": 986758,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940333363.459, "dur": 8.004, + "args": { + "External id": 986759,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940333367.298, "dur": 3.881, + "args": { + "External id": 986760,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940333476.219, "dur": 206.161, + "args": { + "External id": 986761,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940333478.665, "dur": 4.239, + "args": { + "External id": 986762,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940333484.557, "dur": 197.180, + "args": { + "External id": 986763,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940333485.990, "dur": 0.323, + "args": { + "External id": 986764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940333487.572, "dur": 26.298, + "args": { + "External id": 986765,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940333516.035, "dur": 4.184, + "args": { + "External id": 986766,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940333519.155, "dur": 0.795, + "args": { + "External id": 986767,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940333521.343, "dur": 26.220, + "args": { + "External id": 986768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940333523.363, "dur": 1.448, + "args": { + "External id": 986769,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940333526.547, "dur": 20.667, + "args": { + "External id": 986770,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940333529.897, "dur": 2.520, + "args": { + "External id": 986771,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940333549.085, "dur": 22.515, + "args": { + "External id": 986772,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940333573.418, "dur": 16.373, + "args": { + "External id": 986773,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940333592.681, "dur": 15.489, + "args": { + "External id": 986774,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940333609.708, "dur": 14.349, + "args": { + "External id": 986775,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940333626.509, "dur": 26.701, + "args": { + "External id": 986776,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940333629.397, "dur": 1.848, + "args": { + "External id": 986777,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940333637.698, "dur": 0.789, + "args": { + "External id": 986778,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940333654.984, "dur": 12.487, + "args": { + "External id": 986779,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940333668.849, "dur": 11.562, + "args": { + "External id": 986780,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940333690.011, "dur": 1.759, + "args": { + "External id": 986781,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940333701.267, "dur": 3.966, + "args": { + "External id": 986782,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940333703.771, "dur": 0.484, + "args": { + "External id": 986783,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940333778.664, "dur": 56.322, + "args": { + "External id": 986784,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940333842.800, "dur": 5.567, + "args": { + "External id": 986785,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940333846.362, "dur": 0.766, + "args": { + "External id": 986786,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940333849.883, "dur": 25.624, + "args": { + "External id": 986787,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940333880.491, "dur": 5.230, + "args": { + "External id": 986788,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940333882.094, "dur": 2.973, + "args": { + "External id": 986789,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940333884.074, "dur": 0.682, + "args": { + "External id": 986790,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940333890.952, "dur": 47.447, + "args": { + "External id": 986791,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940333891.857, "dur": 45.839, + "args": { + "External id": 986792,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940333942.993, "dur": 17.144, + "args": { + "External id": 986793,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940333967.203, "dur": 4.056, + "args": { + "External id": 986794,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940333969.926, "dur": 0.450, + "args": { + "External id": 986795,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940333975.158, "dur": 113.485, + "args": { + "External id": 986796,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940333976.125, "dur": 5.253, + "args": { + "External id": 986797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940333978.738, "dur": 1.987, + "args": { + "External id": 986798,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940333980.071, "dur": 0.498, + "args": { + "External id": 986799,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940333982.178, "dur": 105.358, + "args": { + "External id": 986800,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940333983.046, "dur": 67.759, + "args": { + "External id": 986801,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940334098.332, "dur": 5.483, + "args": { + "External id": 986802,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940334101.648, "dur": 0.730, + "args": { + "External id": 986803,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940334111.888, "dur": 1.918, + "args": { + "External id": 986804,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940334125.365, "dur": 9.486, + "args": { + "External id": 986805,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940334127.485, "dur": 7.062, + "args": { + "External id": 986806,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940334242.460, "dur": 207.649, + "args": { + "External id": 986807,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940334245.023, "dur": 2.376, + "args": { + "External id": 986808,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940334249.386, "dur": 200.311, + "args": { + "External id": 986809,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940334250.800, "dur": 0.333, + "args": { + "External id": 986810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940334253.858, "dur": 26.609, + "args": { + "External id": 986811,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940334282.544, "dur": 4.379, + "args": { + "External id": 986812,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940334285.790, "dur": 0.827, + "args": { + "External id": 986813,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940334287.864, "dur": 25.641, + "args": { + "External id": 986814,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940334289.435, "dur": 1.378, + "args": { + "External id": 986815,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940334292.562, "dur": 20.510, + "args": { + "External id": 986816,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940334295.944, "dur": 3.600, + "args": { + "External id": 986817,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940334315.151, "dur": 22.554, + "args": { + "External id": 986818,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940334339.226, "dur": 14.947, + "args": { + "External id": 986819,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940334357.402, "dur": 16.359, + "args": { + "External id": 986820,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940334375.122, "dur": 16.674, + "args": { + "External id": 986821,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940334394.072, "dur": 23.199, + "args": { + "External id": 986822,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940334396.626, "dur": 1.620, + "args": { + "External id": 986823,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940334400.677, "dur": 0.620, + "args": { + "External id": 986824,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940334418.946, "dur": 14.668, + "args": { + "External id": 986825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940334434.933, "dur": 13.587, + "args": { + "External id": 986826,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940334457.609, "dur": 1.964, + "args": { + "External id": 986827,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940334470.401, "dur": 4.673, + "args": { + "External id": 986828,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940334473.300, "dur": 0.600, + "args": { + "External id": 986829,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940334549.444, "dur": 71.227, + "args": { + "External id": 986830,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940334627.377, "dur": 5.269, + "args": { + "External id": 986831,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940334630.467, "dur": 0.802, + "args": { + "External id": 986832,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940334634.506, "dur": 27.511, + "args": { + "External id": 986833,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940334667.111, "dur": 6.288, + "args": { + "External id": 986834,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940334668.970, "dur": 3.677, + "args": { + "External id": 986835,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940334671.729, "dur": 0.660, + "args": { + "External id": 986836,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940334676.120, "dur": 47.398, + "args": { + "External id": 986837,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940334677.242, "dur": 45.430, + "args": { + "External id": 986838,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940334728.343, "dur": 17.478, + "args": { + "External id": 986839,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940334754.887, "dur": 4.120, + "args": { + "External id": 986840,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940334757.318, "dur": 0.628, + "args": { + "External id": 986841,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940334763.432, "dur": 55.621, + "args": { + "External id": 986842,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940334764.599, "dur": 6.618, + "args": { + "External id": 986843,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940334765.324, "dur": 5.190, + "args": { + "External id": 986844,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940334767.003, "dur": 3.351, + "args": { + "External id": 986845,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940334773.421, "dur": 44.976, + "args": { + "External id": 986846,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940334774.098, "dur": 43.520, + "args": { + "External id": 986847,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940334823.867, "dur": 3.807, + "args": { + "External id": 986848,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940334825.947, "dur": 0.429, + "args": { + "External id": 986849,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940334833.798, "dur": 1.643, + "args": { + "External id": 986850,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940334843.614, "dur": 8.927, + "args": { + "External id": 986851,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940334847.927, "dur": 4.280, + "args": { + "External id": 986852,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940334948.609, "dur": 271.248, + "args": { + "External id": 986853,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940334951.046, "dur": 1.989, + "args": { + "External id": 986854,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940334954.794, "dur": 264.548, + "args": { + "External id": 986855,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940334955.808, "dur": 0.289, + "args": { + "External id": 986856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940334957.585, "dur": 23.321, + "args": { + "External id": 986857,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940334982.588, "dur": 3.855, + "args": { + "External id": 986858,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940334985.444, "dur": 0.730, + "args": { + "External id": 986859,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940334987.288, "dur": 45.652, + "args": { + "External id": 986860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940334988.594, "dur": 1.286, + "args": { + "External id": 986861,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940334991.103, "dur": 41.034, + "args": { + "External id": 986862,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940334994.058, "dur": 2.002, + "args": { + "External id": 986863,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940335035.420, "dur": 68.406, + "args": { + "External id": 986864,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940335106.550, "dur": 19.091, + "args": { + "External id": 986865,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940335129.346, "dur": 16.101, + "args": { + "External id": 986866,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940335147.159, "dur": 15.486, + "args": { + "External id": 986867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940335164.762, "dur": 24.995, + "args": { + "External id": 986868,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940335167.418, "dur": 2.461, + "args": { + "External id": 986869,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940335172.281, "dur": 0.736, + "args": { + "External id": 986870,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940335191.583, "dur": 14.249, + "args": { + "External id": 986871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940335206.908, "dur": 11.007, + "args": { + "External id": 986872,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940335231.425, "dur": 2.563, + "args": { + "External id": 986873,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940335245.351, "dur": 4.175, + "args": { + "External id": 986874,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940335248.122, "dur": 0.416, + "args": { + "External id": 986875,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940335331.632, "dur": 65.723, + "args": { + "External id": 986876,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940335403.599, "dur": 5.764, + "args": { + "External id": 986877,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940335407.316, "dur": 0.736, + "args": { + "External id": 986878,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940335410.779, "dur": 27.704, + "args": { + "External id": 986879,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940335443.283, "dur": 7.693, + "args": { + "External id": 986880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940335445.000, "dur": 5.093, + "args": { + "External id": 986881,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940335449.069, "dur": 0.786, + "args": { + "External id": 986882,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940335454.363, "dur": 49.030, + "args": { + "External id": 986883,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940335455.316, "dur": 47.384, + "args": { + "External id": 986884,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940335507.858, "dur": 17.097, + "args": { + "External id": 986885,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940335531.800, "dur": 4.323, + "args": { + "External id": 986886,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940335534.480, "dur": 0.676, + "args": { + "External id": 986887,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940335542.148, "dur": 50.782, + "args": { + "External id": 986888,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940335543.220, "dur": 3.763, + "args": { + "External id": 986889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940335543.857, "dur": 2.451, + "args": { + "External id": 986890,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940335545.323, "dur": 0.775, + "args": { + "External id": 986891,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940335547.784, "dur": 44.680, + "args": { + "External id": 986892,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940335548.492, "dur": 43.360, + "args": { + "External id": 986893,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940335597.403, "dur": 6.940, + "args": { + "External id": 986894,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940335599.735, "dur": 3.135, + "args": { + "External id": 986895,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940335613.157, "dur": 1.412, + "args": { + "External id": 986896,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940335624.639, "dur": 6.391, + "args": { + "External id": 986897,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940335626.539, "dur": 4.184, + "args": { + "External id": 986898,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940335727.209, "dur": 212.951, + "args": { + "External id": 986899,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940335729.529, "dur": 2.223, + "args": { + "External id": 986900,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940335735.823, "dur": 203.763, + "args": { + "External id": 986901,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940335737.129, "dur": 0.496, + "args": { + "External id": 986902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940335739.052, "dur": 23.560, + "args": { + "External id": 986903,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940335764.521, "dur": 4.033, + "args": { + "External id": 986904,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940335767.454, "dur": 0.808, + "args": { + "External id": 986905,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940335769.458, "dur": 26.379, + "args": { + "External id": 986906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940335771.501, "dur": 1.656, + "args": { + "External id": 986907,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940335774.644, "dur": 20.797, + "args": { + "External id": 986908,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940335777.919, "dur": 3.297, + "args": { + "External id": 986909,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940335797.504, "dur": 26.448, + "args": { + "External id": 986910,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940335825.477, "dur": 15.607, + "args": { + "External id": 986911,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940335844.244, "dur": 18.091, + "args": { + "External id": 986912,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940335866.912, "dur": 15.190, + "args": { + "External id": 986913,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940335884.184, "dur": 23.566, + "args": { + "External id": 986914,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940335886.684, "dur": 1.787, + "args": { + "External id": 986915,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940335890.785, "dur": 0.772, + "args": { + "External id": 986916,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940335909.464, "dur": 14.708, + "args": { + "External id": 986917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940335925.128, "dur": 13.301, + "args": { + "External id": 986918,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940335947.124, "dur": 1.827, + "args": { + "External id": 986919,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940335958.465, "dur": 4.343, + "args": { + "External id": 986920,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940335961.252, "dur": 0.547, + "args": { + "External id": 986921,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940336091.267, "dur": 78.885, + "args": { + "External id": 986922,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940336180.461, "dur": 7.195, + "args": { + "External id": 986923,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940336184.637, "dur": 1.108, + "args": { + "External id": 986924,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940336189.285, "dur": 32.543, + "args": { + "External id": 986925,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940336227.528, "dur": 5.832, + "args": { + "External id": 986926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940336229.423, "dur": 3.114, + "args": { + "External id": 986927,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940336231.545, "dur": 0.763, + "args": { + "External id": 986928,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940336238.253, "dur": 47.855, + "args": { + "External id": 986929,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940336239.573, "dur": 45.708, + "args": { + "External id": 986930,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940336290.655, "dur": 17.580, + "args": { + "External id": 986931,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940336315.616, "dur": 3.776, + "args": { + "External id": 986932,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940336317.705, "dur": 0.643, + "args": { + "External id": 986933,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940336323.952, "dur": 50.123, + "args": { + "External id": 986934,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940336324.971, "dur": 5.696, + "args": { + "External id": 986935,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940336327.966, "dur": 2.096, + "args": { + "External id": 986936,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940336329.400, "dur": 0.509, + "args": { + "External id": 986937,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940336331.468, "dur": 42.151, + "args": { + "External id": 986938,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940336331.990, "dur": 40.909, + "args": { + "External id": 986939,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940336379.205, "dur": 4.211, + "args": { + "External id": 986940,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940336381.283, "dur": 0.681, + "args": { + "External id": 986941,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940336390.369, "dur": 1.583, + "args": { + "External id": 986942,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940336403.318, "dur": 6.934, + "args": { + "External id": 986943,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940336405.619, "dur": 4.353, + "args": { + "External id": 986944,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940336507.960, "dur": 209.610, + "args": { + "External id": 986945,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940336510.658, "dur": 4.254, + "args": { + "External id": 986946,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940336516.269, "dur": 200.830, + "args": { + "External id": 986947,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940336517.632, "dur": 0.592, + "args": { + "External id": 986948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940336521.730, "dur": 24.947, + "args": { + "External id": 986949,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940336548.553, "dur": 4.143, + "args": { + "External id": 986950,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940336551.523, "dur": 0.896, + "args": { + "External id": 986951,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940336553.596, "dur": 25.321, + "args": { + "External id": 986952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940336554.693, "dur": 1.390, + "args": { + "External id": 986953,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940336557.676, "dur": 20.857, + "args": { + "External id": 986954,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940336561.094, "dur": 2.426, + "args": { + "External id": 986955,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940336580.408, "dur": 22.903, + "args": { + "External id": 986956,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940336604.814, "dur": 17.314, + "args": { + "External id": 986957,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940336624.851, "dur": 15.642, + "args": { + "External id": 986958,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940336641.910, "dur": 17.653, + "args": { + "External id": 986959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940336661.505, "dur": 23.262, + "args": { + "External id": 986960,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940336664.025, "dur": 2.148, + "args": { + "External id": 986961,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940336668.372, "dur": 0.866, + "args": { + "External id": 986962,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940336686.437, "dur": 15.626, + "args": { + "External id": 986963,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940336703.235, "dur": 12.703, + "args": { + "External id": 986964,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940336724.241, "dur": 1.961, + "args": { + "External id": 986965,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940336735.272, "dur": 4.520, + "args": { + "External id": 986966,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940336738.200, "dur": 0.667, + "args": { + "External id": 986967,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940336811.120, "dur": 53.648, + "args": { + "External id": 986968,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940336870.059, "dur": 5.369, + "args": { + "External id": 986969,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940336873.417, "dur": 0.747, + "args": { + "External id": 986970,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940336876.925, "dur": 26.722, + "args": { + "External id": 986971,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940336908.701, "dur": 5.597, + "args": { + "External id": 986972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940336910.314, "dur": 3.177, + "args": { + "External id": 986973,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940336912.538, "dur": 0.706, + "args": { + "External id": 986974,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940336916.655, "dur": 46.121, + "args": { + "External id": 986975,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940336917.967, "dur": 44.141, + "args": { + "External id": 986976,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940336966.820, "dur": 15.054, + "args": { + "External id": 986977,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940336987.292, "dur": 46.395, + "args": { + "External id": 986978,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940336991.144, "dur": 41.728, + "args": { + "External id": 986979,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940336996.443, "dur": 0.793, + "args": { + "External id": 986980,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345940337041.571, "dur": 75.862, + "args": { + "External id": 986981,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940337043.913, "dur": 73.158, + "args": { + "External id": 986982,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 19882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940337049.653, "dur": 42.719, + "args": { + "External id": 986983,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940337096.163, "dur": 20.300, + "args": { + "External id": 986984,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2338706, + "ts": 6345940337133.017, "dur": 7.140, + "args": { + "External id": 986985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2338706, + "ts": 6345940337136.391, "dur": 3.426, + "args": { + "External id": 986986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2338706, + "ts": 6345940337141.285, "dur": 2.905, + "args": { + "External id": 986987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2338706, + "ts": 6345940337143.515, "dur": 0.516, + "args": { + "External id": 986988,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940337191.753, "dur": 28.188, + "args": { + "External id": 986989,"Sequence number": 10552679, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 19889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940337222.128, "dur": 15.400, + "args": { + "External id": 986990,"Sequence number": 10552680, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 19890 + } + }, + { + "ph": "s", "id": 240, "pid": 2338706, "tid": 2338706, "ts": 6345940337222.128, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940337244.739, "dur": 8.234, + "args": { + "External id": 986991,"Sequence number": 10552681, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 19891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940337249.723, "dur": 1.345, + "args": { + "External id": 986992,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2338706, + "ts": 6345940337256.168, "dur": 7.582, + "args": { + "External id": 986993,"Sequence number": 10552681, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 19893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940337261.594, "dur": 0.598, + "args": { + "External id": 986994,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940337265.256, "dur": 3.057, + "args": { + "External id": 986995,"Sequence number": 10552681, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 19895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940337267.281, "dur": 0.347, + "args": { + "External id": 986996,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 19896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940337273.059, "dur": 6.227, + "args": { + "External id": 986997,"Sequence number": 10552681, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19897 + } + }, + { + "ph": "s", "id": 239, "pid": 2338706, "tid": 2338706, "ts": 6345940337273.059, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940337276.968, "dur": 0.844, + "args": { + "External id": 986998,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940337282.742, "dur": 5.483, + "args": { + "External id": 986999,"Sequence number": 10552682, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19899 + } + }, + { + "ph": "s", "id": 238, "pid": 2338706, "tid": 2338706, "ts": 6345940337282.742, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940337286.988, "dur": 0.307, + "args": { + "External id": 987000,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2338706, + "ts": 6345940337289.597, "dur": 5.848, + "args": { + "External id": 987001,"Sequence number": 10552683, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 19901 + } + }, + { + "ph": "s", "id": 237, "pid": 2338706, "tid": 2338706, "ts": 6345940337289.597, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940337293.898, "dur": 0.558, + "args": { + "External id": 987002,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940337296.946, "dur": 7.613, + "args": { + "External id": 987003,"Sequence number": 10552684, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 19903 + } + }, + { + "ph": "s", "id": 236, "pid": 2338706, "tid": 2338706, "ts": 6345940337296.946, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940337300.355, "dur": 3.298, + "args": { + "External id": 987004,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 19904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2338706, + "ts": 6345940337308.973, "dur": 37.264, + "args": { + "External id": 987005,"Sequence number": 10552685, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2338706, + "ts": 6345940337312.542, "dur": 33.474, + "args": { + "External id": 987006,"Sequence number": 10552685, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940337315.598, "dur": 9.370, + "args": { + "External id": 987007,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 19907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940337318.890, "dur": 5.431, + "args": { + "External id": 987008,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940337325.982, "dur": 19.453, + "args": { + "External id": 987009,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 19909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940337374.479, "dur": 4.954, + "args": { + "External id": 987010,"Sequence number": 10552685, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 19910 + } + }, + { + "ph": "s", "id": 235, "pid": 2338706, "tid": 2338706, "ts": 6345940337374.479, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940337382.108, "dur": 1.149, + "args": { + "External id": 987011,"Sequence number": 10552686, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940337422.094, "dur": 46347.071, + "args": { + "External id": 987012,"Sequence number": 10552686, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 19912 + } + }, + { + "ph": "s", "id": 234, "pid": 2338706, "tid": 2338706, "ts": 6345940337422.094, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2338706, + "ts": 6345940337440.288, "dur": 32.887, + "args": { + "External id": 987013,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2338706, + "ts": 6345940337441.113, "dur": 31.825, + "args": { + "External id": 987014,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940337442.540, "dur": 5.860, + "args": { + "External id": 987015,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940337444.497, "dur": 3.463, + "args": { + "External id": 987016,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940337454.115, "dur": 18.325, + "args": { + "External id": 987017,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 19917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940337492.471, "dur": 30.294, + "args": { + "External id": 987018,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940337493.914, "dur": 6.631, + "args": { + "External id": 987019,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940337496.089, "dur": 4.058, + "args": { + "External id": 987020,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345940337502.343, "dur": 20.172, + "args": { + "External id": 987021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940337505.489, "dur": 16.544, + "args": { + "External id": 987022,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940337526.794, "dur": 22.202, + "args": { + "External id": 987023,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940337527.843, "dur": 5.042, + "args": { + "External id": 987024,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940337529.231, "dur": 3.385, + "args": { + "External id": 987025,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345940337533.433, "dur": 15.335, + "args": { + "External id": 987026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940337534.129, "dur": 14.240, + "args": { + "External id": 987027,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 19927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2338706, + "ts": 6345940337556.082, "dur": 23.287, + "args": { + "External id": 987028,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940337558.241, "dur": 3.445, + "args": { + "External id": 987029,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345940337562.422, "dur": 16.550, + "args": { + "External id": 987030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 19930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940337565.093, "dur": 13.509, + "args": { + "External id": 987031,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338706, "tid": 2338706, + "ts": 6345940337584.962, "dur": 29.665, + "args": { + "External id": 987032,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940337617.949, "dur": 56.875, + "args": { + "External id": 987033,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940337620.482, "dur": 53.907, + "args": { + "External id": 987034,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940337626.558, "dur": 0.906, + "args": { + "External id": 987035,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345940337629.076, "dur": 26.664, + "args": { + "External id": 987036,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940337631.293, "dur": 24.223, + "args": { + "External id": 987037,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 19937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940337634.444, "dur": 2.996, + "args": { + "External id": 987038,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940337638.163, "dur": 16.935, + "args": { + "External id": 987039,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 19939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338706, "tid": 2338706, + "ts": 6345940337678.756, "dur": 39292.760, + "args": { + "External id": 987040,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338706, "tid": 2338706, + "ts": 6345940337680.787, "dur": 39289.000, + "args": { + "External id": 987041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940376987.103, "dur": 7.966, + "args": { + "External id": 987042,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940376991.793, "dur": 1.202, + "args": { + "External id": 987043,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940377003.598, "dur": 179.115, + "args": { + "External id": 987044,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940377005.470, "dur": 20.632, + "args": { + "External id": 987045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940377020.669, "dur": 4.247, + "args": { + "External id": 987046,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940377023.105, "dur": 1.306, + "args": { + "External id": 987047,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940377027.759, "dur": 154.041, + "args": { + "External id": 987048,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940377030.090, "dur": 150.070, + "args": { + "External id": 987049,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940377190.957, "dur": 9.163, + "args": { + "External id": 987050,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940377195.875, "dur": 1.035, + "args": { + "External id": 987051,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940377212.002, "dur": 3.798, + "args": { + "External id": 987052,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940377229.331, "dur": 9.163, + "args": { + "External id": 987053,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940377233.047, "dur": 5.103, + "args": { + "External id": 987054,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940377427.072, "dur": 258.939, + "args": { + "External id": 987055,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940377431.248, "dur": 2.702, + "args": { + "External id": 987056,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940377439.047, "dur": 246.325, + "args": { + "External id": 987057,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940377442.324, "dur": 0.516, + "args": { + "External id": 987058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940377445.033, "dur": 37.578, + "args": { + "External id": 987059,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940377485.059, "dur": 5.779, + "args": { + "External id": 987060,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940377489.617, "dur": 0.801, + "args": { + "External id": 987061,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940377492.144, "dur": 33.061, + "args": { + "External id": 987062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940377494.605, "dur": 1.492, + "args": { + "External id": 987063,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940377498.160, "dur": 26.647, + "args": { + "External id": 987064,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940377503.664, "dur": 4.658, + "args": { + "External id": 987065,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940377528.320, "dur": 28.289, + "args": { + "External id": 987066,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940377559.748, "dur": 18.761, + "args": { + "External id": 987067,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940377583.691, "dur": 18.229, + "args": { + "External id": 987068,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940377604.005, "dur": 17.313, + "args": { + "External id": 987069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940377624.085, "dur": 26.576, + "args": { + "External id": 987070,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940377627.320, "dur": 1.650, + "args": { + "External id": 987071,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940377632.562, "dur": 0.598, + "args": { + "External id": 987072,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940377653.880, "dur": 15.160, + "args": { + "External id": 987073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940377670.571, "dur": 13.612, + "args": { + "External id": 987074,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940377696.569, "dur": 2.376, + "args": { + "External id": 987075,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940377708.808, "dur": 5.493, + "args": { + "External id": 987076,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940377712.525, "dur": 0.466, + "args": { + "External id": 987077,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940377813.659, "dur": 83.695, + "args": { + "External id": 987078,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940377904.440, "dur": 8.827, + "args": { + "External id": 987079,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940377908.158, "dur": 2.254, + "args": { + "External id": 987080,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940377915.446, "dur": 36.674, + "args": { + "External id": 987081,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940377958.779, "dur": 10.083, + "args": { + "External id": 987082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940377961.104, "dur": 6.762, + "args": { + "External id": 987083,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940377966.441, "dur": 1.128, + "args": { + "External id": 987084,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940377973.874, "dur": 78.215, + "args": { + "External id": 987085,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940377975.110, "dur": 75.580, + "args": { + "External id": 987086,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940378096.520, "dur": 25.360, + "args": { + "External id": 987087,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940378137.923, "dur": 17.606, + "args": { + "External id": 987088,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940378151.678, "dur": 0.915, + "args": { + "External id": 987089,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940378161.173, "dur": 66.835, + "args": { + "External id": 987090,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940378164.466, "dur": 6.213, + "args": { + "External id": 987091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940378166.640, "dur": 3.324, + "args": { + "External id": 987092,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940378168.714, "dur": 1.040, + "args": { + "External id": 987093,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940378171.923, "dur": 55.355, + "args": { + "External id": 987094,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940378173.210, "dur": 53.241, + "args": { + "External id": 987095,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940378233.003, "dur": 5.031, + "args": { + "External id": 987096,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940378235.673, "dur": 0.700, + "args": { + "External id": 987097,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940378247.806, "dur": 2.137, + "args": { + "External id": 987098,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940378262.685, "dur": 8.935, + "args": { + "External id": 987099,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940378265.006, "dur": 6.274, + "args": { + "External id": 987100,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940378398.006, "dur": 236.027, + "args": { + "External id": 987101,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940378402.290, "dur": 1.930, + "args": { + "External id": 987102,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940378407.880, "dur": 225.516, + "args": { + "External id": 987103,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940378409.854, "dur": 0.791, + "args": { + "External id": 987104,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940378412.246, "dur": 28.966, + "args": { + "External id": 987105,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940378443.457, "dur": 5.963, + "args": { + "External id": 987106,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940378446.148, "dur": 2.921, + "args": { + "External id": 987107,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940378450.560, "dur": 29.670, + "args": { + "External id": 987108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940378452.872, "dur": 1.315, + "args": { + "External id": 987109,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940378455.821, "dur": 24.050, + "args": { + "External id": 987110,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940378459.818, "dur": 3.145, + "args": { + "External id": 987111,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940378482.058, "dur": 27.202, + "args": { + "External id": 987112,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940378511.279, "dur": 19.181, + "args": { + "External id": 987113,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940378533.341, "dur": 18.742, + "args": { + "External id": 987114,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940378553.877, "dur": 17.177, + "args": { + "External id": 987115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940378573.894, "dur": 26.839, + "args": { + "External id": 987116,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940378576.649, "dur": 2.581, + "args": { + "External id": 987117,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940378581.680, "dur": 0.829, + "args": { + "External id": 987118,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940378602.485, "dur": 15.329, + "args": { + "External id": 987119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940378618.894, "dur": 13.140, + "args": { + "External id": 987120,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940378641.341, "dur": 2.069, + "args": { + "External id": 987121,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940378655.195, "dur": 5.413, + "args": { + "External id": 987122,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940378659.171, "dur": 0.465, + "args": { + "External id": 987123,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940378742.341, "dur": 66.511, + "args": { + "External id": 987124,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940378815.350, "dur": 5.091, + "args": { + "External id": 987125,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940378818.475, "dur": 0.739, + "args": { + "External id": 987126,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940378822.237, "dur": 30.053, + "args": { + "External id": 987127,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940378857.769, "dur": 7.048, + "args": { + "External id": 987128,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940378859.721, "dur": 4.260, + "args": { + "External id": 987129,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940378863.036, "dur": 0.716, + "args": { + "External id": 987130,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940378868.305, "dur": 48.598, + "args": { + "External id": 987131,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940378869.262, "dur": 46.927, + "args": { + "External id": 987132,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940378921.195, "dur": 15.102, + "args": { + "External id": 987133,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940378944.889, "dur": 4.614, + "args": { + "External id": 987134,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940378947.530, "dur": 0.785, + "args": { + "External id": 987135,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940378954.024, "dur": 178.900, + "args": { + "External id": 987136,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940379006.100, "dur": 25.323, + "args": { + "External id": 987137,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940379024.860, "dur": 5.530, + "args": { + "External id": 987138,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940379029.028, "dur": 1.029, + "args": { + "External id": 987139,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940379034.722, "dur": 97.391, + "args": { + "External id": 987140,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940379035.738, "dur": 94.698, + "args": { + "External id": 987141,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940379142.198, "dur": 7.427, + "args": { + "External id": 987142,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940379145.323, "dur": 2.502, + "args": { + "External id": 987143,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940379159.045, "dur": 2.078, + "args": { + "External id": 987144,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940379172.112, "dur": 8.370, + "args": { + "External id": 987145,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940379174.722, "dur": 5.465, + "args": { + "External id": 987146,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940379303.328, "dur": 209.259, + "args": { + "External id": 987147,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940379307.070, "dur": 2.133, + "args": { + "External id": 987148,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940379310.770, "dur": 201.185, + "args": { + "External id": 987149,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940379312.371, "dur": 0.512, + "args": { + "External id": 987150,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940379314.647, "dur": 29.788, + "args": { + "External id": 987151,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940379346.524, "dur": 4.497, + "args": { + "External id": 987152,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940379349.738, "dur": 1.009, + "args": { + "External id": 987153,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940379352.210, "dur": 25.924, + "args": { + "External id": 987154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940379353.816, "dur": 1.579, + "args": { + "External id": 987155,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940379357.342, "dur": 20.358, + "args": { + "External id": 987156,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940379361.020, "dur": 3.505, + "args": { + "External id": 987157,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940379379.930, "dur": 26.898, + "args": { + "External id": 987158,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940379408.547, "dur": 16.368, + "args": { + "External id": 987159,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940379428.292, "dur": 15.364, + "args": { + "External id": 987160,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940379445.156, "dur": 14.384, + "args": { + "External id": 987161,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940379461.797, "dur": 21.948, + "args": { + "External id": 987162,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940379464.660, "dur": 1.647, + "args": { + "External id": 987163,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940379468.389, "dur": 0.746, + "args": { + "External id": 987164,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940379485.858, "dur": 12.679, + "args": { + "External id": 987165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940379499.655, "dur": 11.186, + "args": { + "External id": 987166,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940379520.139, "dur": 2.225, + "args": { + "External id": 987167,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940379535.276, "dur": 5.307, + "args": { + "External id": 987168,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940379539.021, "dur": 0.528, + "args": { + "External id": 987169,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940379620.045, "dur": 62.065, + "args": { + "External id": 987170,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940379688.211, "dur": 5.232, + "args": { + "External id": 987171,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940379691.403, "dur": 0.832, + "args": { + "External id": 987172,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940379695.131, "dur": 30.192, + "args": { + "External id": 987173,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940379730.451, "dur": 8.530, + "args": { + "External id": 987174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940379732.361, "dur": 5.627, + "args": { + "External id": 987175,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940379737.158, "dur": 0.649, + "args": { + "External id": 987176,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940379742.503, "dur": 48.890, + "args": { + "External id": 987177,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940379743.621, "dur": 46.886, + "args": { + "External id": 987178,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940379795.817, "dur": 18.760, + "args": { + "External id": 987179,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940379821.575, "dur": 3.812, + "args": { + "External id": 987180,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940379823.864, "dur": 0.552, + "args": { + "External id": 987181,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940379832.800, "dur": 50.730, + "args": { + "External id": 987182,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940379834.091, "dur": 3.933, + "args": { + "External id": 987183,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940379834.735, "dur": 2.560, + "args": { + "External id": 987184,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940379836.404, "dur": 0.722, + "args": { + "External id": 987185,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940379838.709, "dur": 44.395, + "args": { + "External id": 987186,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940379839.965, "dur": 42.405, + "args": { + "External id": 987187,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940379890.195, "dur": 3.980, + "args": { + "External id": 987188,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940379892.254, "dur": 0.624, + "args": { + "External id": 987189,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940379900.398, "dur": 1.541, + "args": { + "External id": 987190,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940379910.617, "dur": 5.577, + "args": { + "External id": 987191,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940379912.380, "dur": 3.508, + "args": { + "External id": 987192,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940380031.105, "dur": 263.205, + "args": { + "External id": 987193,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940380034.995, "dur": 5.825, + "args": { + "External id": 987194,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940380042.922, "dur": 250.851, + "args": { + "External id": 987195,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940380044.279, "dur": 0.496, + "args": { + "External id": 987196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940380046.145, "dur": 67.121, + "args": { + "External id": 987197,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940380116.566, "dur": 4.972, + "args": { + "External id": 987198,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940380120.048, "dur": 1.191, + "args": { + "External id": 987199,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940380122.517, "dur": 28.799, + "args": { + "External id": 987200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940380124.183, "dur": 2.087, + "args": { + "External id": 987201,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940380127.806, "dur": 22.993, + "args": { + "External id": 987202,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940380131.946, "dur": 3.255, + "args": { + "External id": 987203,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940380152.767, "dur": 25.122, + "args": { + "External id": 987204,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940380179.475, "dur": 17.524, + "args": { + "External id": 987205,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940380200.121, "dur": 16.877, + "args": { + "External id": 987206,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940380218.586, "dur": 16.309, + "args": { + "External id": 987207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940380237.402, "dur": 24.383, + "args": { + "External id": 987208,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940380239.775, "dur": 1.875, + "args": { + "External id": 987209,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940380244.000, "dur": 0.855, + "args": { + "External id": 987210,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940380263.215, "dur": 14.815, + "args": { + "External id": 987211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940380279.365, "dur": 13.051, + "args": { + "External id": 987212,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940380303.268, "dur": 2.696, + "args": { + "External id": 987213,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940380317.495, "dur": 4.645, + "args": { + "External id": 987214,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940380320.450, "dur": 0.676, + "args": { + "External id": 987215,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940380403.865, "dur": 66.543, + "args": { + "External id": 987216,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940380476.533, "dur": 5.480, + "args": { + "External id": 987217,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940380480.060, "dur": 0.679, + "args": { + "External id": 987218,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940380483.431, "dur": 30.477, + "args": { + "External id": 987219,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940380518.945, "dur": 6.566, + "args": { + "External id": 987220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940380521.118, "dur": 3.616, + "args": { + "External id": 987221,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940380523.835, "dur": 0.670, + "args": { + "External id": 987222,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940380528.555, "dur": 45.889, + "args": { + "External id": 987223,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940380529.618, "dur": 43.852, + "args": { + "External id": 987224,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940380579.002, "dur": 17.452, + "args": { + "External id": 987225,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940380605.386, "dur": 4.292, + "args": { + "External id": 987226,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940380607.926, "dur": 0.827, + "args": { + "External id": 987227,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940380613.980, "dur": 55.762, + "args": { + "External id": 987228,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940380615.583, "dur": 6.071, + "args": { + "External id": 987229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940380616.215, "dur": 4.805, + "args": { + "External id": 987230,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940380620.346, "dur": 0.536, + "args": { + "External id": 987231,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940380622.544, "dur": 46.761, + "args": { + "External id": 987232,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940380623.660, "dur": 45.010, + "args": { + "External id": 987233,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940380674.839, "dur": 4.212, + "args": { + "External id": 987234,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940380677.211, "dur": 0.610, + "args": { + "External id": 987235,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940380685.630, "dur": 1.701, + "args": { + "External id": 987236,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940380696.057, "dur": 8.153, + "args": { + "External id": 987237,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940380700.000, "dur": 3.923, + "args": { + "External id": 987238,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940380803.447, "dur": 234.248, + "args": { + "External id": 987239,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940380805.984, "dur": 2.210, + "args": { + "External id": 987240,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940380810.090, "dur": 226.985, + "args": { + "External id": 987241,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940380811.445, "dur": 0.520, + "args": { + "External id": 987242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940380813.570, "dur": 27.758, + "args": { + "External id": 987243,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940380843.083, "dur": 3.868, + "args": { + "External id": 987244,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940380845.901, "dur": 0.827, + "args": { + "External id": 987245,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940380847.977, "dur": 28.658, + "args": { + "External id": 987246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940380849.121, "dur": 1.350, + "args": { + "External id": 987247,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940380852.099, "dur": 24.164, + "args": { + "External id": 987248,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940380855.651, "dur": 2.907, + "args": { + "External id": 987249,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940380878.550, "dur": 24.324, + "args": { + "External id": 987250,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940380904.635, "dur": 16.651, + "args": { + "External id": 987251,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940380924.755, "dur": 15.685, + "args": { + "External id": 987252,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940380941.660, "dur": 13.721, + "args": { + "External id": 987253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940380957.465, "dur": 22.763, + "args": { + "External id": 987254,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940380960.270, "dur": 1.934, + "args": { + "External id": 987255,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940380964.878, "dur": 1.012, + "args": { + "External id": 987256,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940380988.888, "dur": 12.149, + "args": { + "External id": 987257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940381002.345, "dur": 32.502, + "args": { + "External id": 987258,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940381048.266, "dur": 2.935, + "args": { + "External id": 987259,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940381102.760, "dur": 7.844, + "args": { + "External id": 987260,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940381108.101, "dur": 0.989, + "args": { + "External id": 987261,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940381203.683, "dur": 69.252, + "args": { + "External id": 987262,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940381278.887, "dur": 5.789, + "args": { + "External id": 987263,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940381282.570, "dur": 0.785, + "args": { + "External id": 987264,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940381286.167, "dur": 30.555, + "args": { + "External id": 987265,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940381321.904, "dur": 9.340, + "args": { + "External id": 987266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940381324.099, "dur": 6.356, + "args": { + "External id": 987267,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940381329.046, "dur": 1.160, + "args": { + "External id": 987268,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940381334.216, "dur": 46.539, + "args": { + "External id": 987269,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940381335.240, "dur": 44.655, + "args": { + "External id": 987270,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940381385.212, "dur": 19.048, + "args": { + "External id": 987271,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940381411.262, "dur": 4.804, + "args": { + "External id": 987272,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940381414.125, "dur": 0.865, + "args": { + "External id": 987273,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940381420.510, "dur": 55.940, + "args": { + "External id": 987274,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940381423.895, "dur": 7.786, + "args": { + "External id": 987275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940381428.335, "dur": 2.678, + "args": { + "External id": 987276,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940381430.024, "dur": 0.830, + "args": { + "External id": 987277,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940381432.295, "dur": 43.478, + "args": { + "External id": 987278,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940381433.156, "dur": 41.894, + "args": { + "External id": 987279,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940381480.636, "dur": 4.220, + "args": { + "External id": 987280,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940381482.922, "dur": 0.654, + "args": { + "External id": 987281,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940381493.163, "dur": 1.901, + "args": { + "External id": 987282,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940381504.555, "dur": 9.953, + "args": { + "External id": 987283,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940381506.763, "dur": 7.425, + "args": { + "External id": 987284,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940381612.243, "dur": 213.061, + "args": { + "External id": 987285,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940381614.448, "dur": 2.456, + "args": { + "External id": 987286,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940381620.290, "dur": 204.447, + "args": { + "External id": 987287,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940381621.988, "dur": 0.512, + "args": { + "External id": 987288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940381624.245, "dur": 25.441, + "args": { + "External id": 987289,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940381652.216, "dur": 3.684, + "args": { + "External id": 987290,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940381654.807, "dur": 0.763, + "args": { + "External id": 987291,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940381657.141, "dur": 25.706, + "args": { + "External id": 987292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940381658.782, "dur": 1.475, + "args": { + "External id": 987293,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940381661.720, "dur": 20.710, + "args": { + "External id": 987294,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940381665.068, "dur": 2.782, + "args": { + "External id": 987295,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940381684.561, "dur": 24.934, + "args": { + "External id": 987296,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940381711.464, "dur": 17.292, + "args": { + "External id": 987297,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940381731.462, "dur": 17.359, + "args": { + "External id": 987298,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940381750.614, "dur": 16.361, + "args": { + "External id": 987299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940381769.045, "dur": 23.919, + "args": { + "External id": 987300,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940381771.684, "dur": 2.047, + "args": { + "External id": 987301,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940381776.363, "dur": 0.546, + "args": { + "External id": 987302,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940381794.811, "dur": 14.435, + "args": { + "External id": 987303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940381810.406, "dur": 12.993, + "args": { + "External id": 987304,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940381831.978, "dur": 1.810, + "args": { + "External id": 987305,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940381843.963, "dur": 4.280, + "args": { + "External id": 987306,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940381846.690, "dur": 0.518, + "args": { + "External id": 987307,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940381921.271, "dur": 56.189, + "args": { + "External id": 987308,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940381982.843, "dur": 4.966, + "args": { + "External id": 987309,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940381985.926, "dur": 0.765, + "args": { + "External id": 987310,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940381989.317, "dur": 48.359, + "args": { + "External id": 987311,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940382044.659, "dur": 7.257, + "args": { + "External id": 987312,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940382046.527, "dur": 4.529, + "args": { + "External id": 987313,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940382049.898, "dur": 0.936, + "args": { + "External id": 987314,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940382055.128, "dur": 91.943, + "args": { + "External id": 987315,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940382091.768, "dur": 54.485, + "args": { + "External id": 987316,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940382153.628, "dur": 19.349, + "args": { + "External id": 987317,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940382180.794, "dur": 5.275, + "args": { + "External id": 987318,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940382184.065, "dur": 0.754, + "args": { + "External id": 987319,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940382190.998, "dur": 55.342, + "args": { + "External id": 987320,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940382192.099, "dur": 5.975, + "args": { + "External id": 987321,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940382194.987, "dur": 2.432, + "args": { + "External id": 987322,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940382196.795, "dur": 0.462, + "args": { + "External id": 987323,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940382198.953, "dur": 46.931, + "args": { + "External id": 987324,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940382199.525, "dur": 45.801, + "args": { + "External id": 987325,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940382250.572, "dur": 4.015, + "args": { + "External id": 987326,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940382252.685, "dur": 0.495, + "args": { + "External id": 987327,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940382261.615, "dur": 1.762, + "args": { + "External id": 987328,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940382274.933, "dur": 6.871, + "args": { + "External id": 987329,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940382277.471, "dur": 4.053, + "args": { + "External id": 987330,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940382379.112, "dur": 223.417, + "args": { + "External id": 987331,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940382381.930, "dur": 2.163, + "args": { + "External id": 987332,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940382385.608, "dur": 216.335, + "args": { + "External id": 987333,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940382386.879, "dur": 0.665, + "args": { + "External id": 987334,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940382388.995, "dur": 31.164, + "args": { + "External id": 987335,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940382422.256, "dur": 3.902, + "args": { + "External id": 987336,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940382425.002, "dur": 0.876, + "args": { + "External id": 987337,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940382427.296, "dur": 28.355, + "args": { + "External id": 987338,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940382428.496, "dur": 1.465, + "args": { + "External id": 987339,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940382431.506, "dur": 23.769, + "args": { + "External id": 987340,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940382436.654, "dur": 3.212, + "args": { + "External id": 987341,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940382457.377, "dur": 24.871, + "args": { + "External id": 987342,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940382483.886, "dur": 22.093, + "args": { + "External id": 987343,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940382509.648, "dur": 16.937, + "args": { + "External id": 987344,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940382528.047, "dur": 15.249, + "args": { + "External id": 987345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940382545.523, "dur": 23.039, + "args": { + "External id": 987346,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940382548.200, "dur": 1.558, + "args": { + "External id": 987347,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940382551.861, "dur": 0.800, + "args": { + "External id": 987348,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940382570.258, "dur": 16.028, + "args": { + "External id": 987349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940382587.251, "dur": 13.602, + "args": { + "External id": 987350,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940382610.160, "dur": 1.661, + "args": { + "External id": 987351,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940382622.256, "dur": 4.700, + "args": { + "External id": 987352,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940382625.202, "dur": 0.758, + "args": { + "External id": 987353,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940382701.258, "dur": 54.825, + "args": { + "External id": 987354,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940382762.120, "dur": 5.679, + "args": { + "External id": 987355,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940382765.724, "dur": 0.911, + "args": { + "External id": 987356,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940382769.719, "dur": 26.967, + "args": { + "External id": 987357,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940382801.722, "dur": 5.843, + "args": { + "External id": 987358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940382803.534, "dur": 3.333, + "args": { + "External id": 987359,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940382805.709, "dur": 0.967, + "args": { + "External id": 987360,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940382810.331, "dur": 44.143, + "args": { + "External id": 987361,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940382811.449, "dur": 42.340, + "args": { + "External id": 987362,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940382859.466, "dur": 15.870, + "args": { + "External id": 987363,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940382881.369, "dur": 4.516, + "args": { + "External id": 987364,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940382884.167, "dur": 0.698, + "args": { + "External id": 987365,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940382889.781, "dur": 50.835, + "args": { + "External id": 987366,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940382890.855, "dur": 4.219, + "args": { + "External id": 987367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940382891.866, "dur": 2.542, + "args": { + "External id": 987368,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940382893.470, "dur": 0.776, + "args": { + "External id": 987369,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940382895.806, "dur": 44.374, + "args": { + "External id": 987370,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940382898.887, "dur": 40.595, + "args": { + "External id": 987371,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940382945.054, "dur": 4.458, + "args": { + "External id": 987372,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940382947.596, "dur": 0.710, + "args": { + "External id": 987373,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940382955.198, "dur": 1.449, + "args": { + "External id": 987374,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940382964.269, "dur": 10.578, + "args": { + "External id": 987375,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940382968.071, "dur": 6.413, + "args": { + "External id": 987376,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940383127.375, "dur": 210.397, + "args": { + "External id": 987377,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940383130.033, "dur": 4.084, + "args": { + "External id": 987378,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940383136.048, "dur": 201.169, + "args": { + "External id": 987379,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940383137.975, "dur": 0.515, + "args": { + "External id": 987380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940383140.150, "dur": 27.098, + "args": { + "External id": 987381,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940383169.842, "dur": 4.482, + "args": { + "External id": 987382,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940383172.671, "dur": 1.350, + "args": { + "External id": 987383,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940383175.665, "dur": 24.711, + "args": { + "External id": 987384,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940383177.277, "dur": 1.494, + "args": { + "External id": 987385,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940383180.348, "dur": 19.716, + "args": { + "External id": 987386,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940383183.669, "dur": 3.079, + "args": { + "External id": 987387,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940383202.254, "dur": 23.401, + "args": { + "External id": 987388,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940383227.275, "dur": 15.493, + "args": { + "External id": 987389,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940383245.961, "dur": 14.826, + "args": { + "External id": 987390,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940383262.243, "dur": 13.744, + "args": { + "External id": 987391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940383281.436, "dur": 23.793, + "args": { + "External id": 987392,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940383283.989, "dur": 2.072, + "args": { + "External id": 987393,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940383289.526, "dur": 0.678, + "args": { + "External id": 987394,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940383309.121, "dur": 13.054, + "args": { + "External id": 987395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940383323.662, "dur": 12.486, + "args": { + "External id": 987396,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940383345.501, "dur": 2.451, + "args": { + "External id": 987397,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940383359.832, "dur": 4.774, + "args": { + "External id": 987398,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940383362.949, "dur": 0.693, + "args": { + "External id": 987399,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940383445.838, "dur": 70.285, + "args": { + "External id": 987400,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940383521.745, "dur": 5.324, + "args": { + "External id": 987401,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940383525.096, "dur": 0.793, + "args": { + "External id": 987402,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940383528.736, "dur": 31.284, + "args": { + "External id": 987403,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940383566.928, "dur": 6.719, + "args": { + "External id": 987404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940383569.363, "dur": 3.533, + "args": { + "External id": 987405,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940383571.769, "dur": 0.887, + "args": { + "External id": 987406,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940383577.015, "dur": 48.351, + "args": { + "External id": 987407,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940383578.150, "dur": 46.392, + "args": { + "External id": 987408,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940383629.791, "dur": 16.489, + "args": { + "External id": 987409,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940383651.358, "dur": 27.702, + "args": { + "External id": 987410,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940383656.237, "dur": 22.371, + "args": { + "External id": 987411,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940383662.578, "dur": 0.914, + "args": { + "External id": 987412,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 20312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345940383684.863, "dur": 30.616, + "args": { + "External id": 987413,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940383686.998, "dur": 28.233, + "args": { + "External id": 987414,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 20314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940383691.917, "dur": 3.776, + "args": { + "External id": 987415,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940383696.727, "dur": 17.960, + "args": { + "External id": 987416,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2338706, + "ts": 6345940383727.867, "dur": 8.883, + "args": { + "External id": 987417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2338706, + "ts": 6345940383733.023, "dur": 3.327, + "args": { + "External id": 987418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2338706, + "ts": 6345940383738.120, "dur": 1.643, + "args": { + "External id": 987419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2338706, + "ts": 6345940383738.650, "dur": 0.791, + "args": { + "External id": 987420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940383790.193, "dur": 30.215, + "args": { + "External id": 987421,"Sequence number": 10552687, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940383822.712, "dur": 15.008, + "args": { + "External id": 987422,"Sequence number": 10552688, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20322 + } + }, + { + "ph": "s", "id": 233, "pid": 2338706, "tid": 2338706, "ts": 6345940383822.712, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940383843.973, "dur": 7.274, + "args": { + "External id": 987423,"Sequence number": 10552689, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 20323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940383848.447, "dur": 1.175, + "args": { + "External id": 987424,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 20324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2338706, + "ts": 6345940383854.277, "dur": 7.140, + "args": { + "External id": 987425,"Sequence number": 10552689, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "3"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 20325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940383859.104, "dur": 0.746, + "args": { + "External id": 987426,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 20326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940383862.973, "dur": 5.327, + "args": { + "External id": 987427,"Sequence number": 10552689, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 20327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940383867.101, "dur": 0.547, + "args": { + "External id": 987428,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 20328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940383873.106, "dur": 6.969, + "args": { + "External id": 987429,"Sequence number": 10552689, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 20329 + } + }, + { + "ph": "s", "id": 232, "pid": 2338706, "tid": 2338706, "ts": 6345940383873.106, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940383877.419, "dur": 1.025, + "args": { + "External id": 987430,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 20330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940383881.157, "dur": 5.346, + "args": { + "External id": 987431,"Sequence number": 10552690, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 20331 + } + }, + { + "ph": "s", "id": 231, "pid": 2338706, "tid": 2338706, "ts": 6345940383881.157, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940383885.044, "dur": 0.527, + "args": { + "External id": 987432,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 20332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338706, "tid": 2338706, + "ts": 6345940383889.782, "dur": 8.624, + "args": { + "External id": 987433,"Sequence number": 10552691, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 20333 + } + }, + { + "ph": "s", "id": 230, "pid": 2338706, "tid": 2338706, "ts": 6345940383889.782, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940383894.124, "dur": 3.280, + "args": { + "External id": 987434,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 20334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940383899.888, "dur": 4.758, + "args": { + "External id": 987435,"Sequence number": 10552692, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 20335 + } + }, + { + "ph": "s", "id": 229, "pid": 2338706, "tid": 2338706, "ts": 6345940383899.888, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940383902.897, "dur": 0.763, + "args": { + "External id": 987436,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 20336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2338706, + "ts": 6345940383908.527, "dur": 38.522, + "args": { + "External id": 987437,"Sequence number": 10552693, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 20337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2338706, + "ts": 6345940383910.614, "dur": 36.193, + "args": { + "External id": 987438,"Sequence number": 10552693, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 20338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940383913.692, "dur": 7.241, + "args": { + "External id": 987439,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 20339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940383916.250, "dur": 4.049, + "args": { + "External id": 987440,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940383924.487, "dur": 21.701, + "args": { + "External id": 987441,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 20341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940383976.317, "dur": 4.213, + "args": { + "External id": 987442,"Sequence number": 10552693, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 20342 + } + }, + { + "ph": "s", "id": 228, "pid": 2338706, "tid": 2338706, "ts": 6345940383976.317, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940383983.206, "dur": 1.030, + "args": { + "External id": 987443,"Sequence number": 10552694, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 20343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338706, "tid": 2338706, + "ts": 6345940384042.795, "dur": 46675.065, + "args": { + "External id": 987444,"Sequence number": 10552694, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 20344 + } + }, + { + "ph": "s", "id": 227, "pid": 2338706, "tid": 2338706, "ts": 6345940384042.795, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338706, "tid": 2338706, + "ts": 6345940384104.497, "dur": 40.397, + "args": { + "External id": 987445,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 20345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2338706, + "ts": 6345940384105.830, "dur": 38.703, + "args": { + "External id": 987446,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 20346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940384108.252, "dur": 11.089, + "args": { + "External id": 987447,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 20347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940384112.677, "dur": 5.727, + "args": { + "External id": 987448,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940384120.273, "dur": 23.752, + "args": { + "External id": 987449,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 20349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940384165.192, "dur": 28.947, + "args": { + "External id": 987450,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 20350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940384166.531, "dur": 7.387, + "args": { + "External id": 987451,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 20351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940384168.976, "dur": 4.594, + "args": { + "External id": 987452,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345940384175.619, "dur": 18.245, + "args": { + "External id": 987453,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940384177.517, "dur": 15.858, + "args": { + "External id": 987454,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 20354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940384197.872, "dur": 25.059, + "args": { + "External id": 987455,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940384198.963, "dur": 4.704, + "args": { + "External id": 987456,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940384200.331, "dur": 3.073, + "args": { + "External id": 987457,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345940384207.188, "dur": 15.525, + "args": { + "External id": 987458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940384208.305, "dur": 14.020, + "args": { + "External id": 987459,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 20359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2338706, + "ts": 6345940384229.870, "dur": 21.496, + "args": { + "External id": 987460,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 20360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940384231.800, "dur": 5.745, + "args": { + "External id": 987461,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345940384238.194, "dur": 12.838, + "args": { + "External id": 987462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 20362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940384238.958, "dur": 11.648, + "args": { + "External id": 987463,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338706, "tid": 2338706, + "ts": 6345940384257.338, "dur": 24.173, + "args": { + "External id": 987464,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940384284.697, "dur": 53.708, + "args": { + "External id": 987465,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940384289.126, "dur": 48.699, + "args": { + "External id": 987466,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940384294.750, "dur": 1.102, + "args": { + "External id": 987467,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 20367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345940384297.150, "dur": 23.231, + "args": { + "External id": 987468,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940384299.187, "dur": 20.925, + "args": { + "External id": 987469,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 20369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940384301.963, "dur": 3.453, + "args": { + "External id": 987470,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940384306.272, "dur": 13.375, + "args": { + "External id": 987471,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 20371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338706, "tid": 2338706, + "ts": 6345940384343.496, "dur": 39753.384, + "args": { + "External id": 987472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 20372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338706, "tid": 2338706, + "ts": 6345940384344.955, "dur": 39750.878, + "args": { + "External id": 987473,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 20373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940424109.350, "dur": 8.625, + "args": { + "External id": 987474,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940424114.679, "dur": 1.288, + "args": { + "External id": 987475,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940424126.204, "dur": 114.008, + "args": { + "External id": 987476,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940424127.897, "dur": 6.707, + "args": { + "External id": 987477,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940424130.309, "dur": 3.304, + "args": { + "External id": 987478,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940424132.507, "dur": 0.813, + "args": { + "External id": 987479,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940424136.107, "dur": 103.226, + "args": { + "External id": 987480,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940424138.268, "dur": 100.152, + "args": { + "External id": 987481,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940424245.627, "dur": 6.177, + "args": { + "External id": 987482,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940424249.219, "dur": 0.693, + "args": { + "External id": 987483,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940424263.054, "dur": 3.813, + "args": { + "External id": 987484,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940424278.294, "dur": 8.623, + "args": { + "External id": 987485,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940424281.713, "dur": 4.841, + "args": { + "External id": 987486,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940424458.705, "dur": 248.365, + "args": { + "External id": 987487,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940424462.919, "dur": 5.742, + "args": { + "External id": 987488,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940424474.415, "dur": 230.823, + "args": { + "External id": 987489,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940424476.168, "dur": 0.979, + "args": { + "External id": 987490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940424479.783, "dur": 31.049, + "args": { + "External id": 987491,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940424514.024, "dur": 3.515, + "args": { + "External id": 987492,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940424516.470, "dur": 0.743, + "args": { + "External id": 987493,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940424519.939, "dur": 31.921, + "args": { + "External id": 987494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940424521.281, "dur": 1.438, + "args": { + "External id": 987495,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940424524.255, "dur": 27.213, + "args": { + "External id": 987496,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940424530.150, "dur": 4.429, + "args": { + "External id": 987497,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940424553.831, "dur": 26.997, + "args": { + "External id": 987498,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940424584.380, "dur": 16.871, + "args": { + "External id": 987499,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940424605.901, "dur": 18.787, + "args": { + "External id": 987500,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940424627.584, "dur": 14.898, + "args": { + "External id": 987501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940424644.379, "dur": 27.323, + "args": { + "External id": 987502,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940424648.937, "dur": 2.094, + "args": { + "External id": 987503,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940424654.182, "dur": 0.588, + "args": { + "External id": 987504,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940424674.311, "dur": 14.044, + "args": { + "External id": 987505,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940424689.699, "dur": 14.123, + "args": { + "External id": 987506,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940424715.703, "dur": 2.396, + "args": { + "External id": 987507,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940424725.704, "dur": 5.418, + "args": { + "External id": 987508,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940424729.543, "dur": 0.442, + "args": { + "External id": 987509,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940424818.765, "dur": 77.540, + "args": { + "External id": 987510,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940424904.798, "dur": 9.035, + "args": { + "External id": 987511,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940424909.003, "dur": 0.923, + "args": { + "External id": 987512,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940424915.591, "dur": 30.876, + "args": { + "External id": 987513,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940424953.454, "dur": 10.797, + "args": { + "External id": 987514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940424956.583, "dur": 5.826, + "args": { + "External id": 987515,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940424961.221, "dur": 0.943, + "args": { + "External id": 987516,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940424967.410, "dur": 72.883, + "args": { + "External id": 987517,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940424968.562, "dur": 70.264, + "args": { + "External id": 987518,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940425048.537, "dur": 59.952, + "args": { + "External id": 987519,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940425120.177, "dur": 8.045, + "args": { + "External id": 987520,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940425125.131, "dur": 0.778, + "args": { + "External id": 987521,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940425134.809, "dur": 74.121, + "args": { + "External id": 987522,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940425139.130, "dur": 9.040, + "args": { + "External id": 987523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940425140.604, "dur": 6.775, + "args": { + "External id": 987524,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940425142.745, "dur": 4.420, + "args": { + "External id": 987525,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940425149.059, "dur": 59.411, + "args": { + "External id": 987526,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940425151.263, "dur": 56.367, + "args": { + "External id": 987527,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940425217.640, "dur": 5.410, + "args": { + "External id": 987528,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940425220.939, "dur": 0.404, + "args": { + "External id": 987529,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940425235.430, "dur": 1.703, + "args": { + "External id": 987530,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940425248.457, "dur": 7.947, + "args": { + "External id": 987531,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940425251.659, "dur": 4.451, + "args": { + "External id": 987532,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940425375.859, "dur": 227.659, + "args": { + "External id": 987533,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940425378.837, "dur": 3.640, + "args": { + "External id": 987534,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940425386.049, "dur": 216.930, + "args": { + "External id": 987535,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940425387.633, "dur": 0.611, + "args": { + "External id": 987536,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940425389.786, "dur": 27.695, + "args": { + "External id": 987537,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940425419.426, "dur": 4.221, + "args": { + "External id": 987538,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940425422.340, "dur": 0.988, + "args": { + "External id": 987539,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940425424.808, "dur": 29.003, + "args": { + "External id": 987540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940425426.210, "dur": 1.784, + "args": { + "External id": 987541,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940425429.637, "dur": 23.682, + "args": { + "External id": 987542,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940425433.396, "dur": 2.940, + "args": { + "External id": 987543,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940425455.796, "dur": 28.288, + "args": { + "External id": 987544,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940425485.885, "dur": 18.997, + "args": { + "External id": 987545,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940425508.089, "dur": 17.705, + "args": { + "External id": 987546,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940425527.403, "dur": 18.195, + "args": { + "External id": 987547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940425547.687, "dur": 23.377, + "args": { + "External id": 987548,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940425550.488, "dur": 2.331, + "args": { + "External id": 987549,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940425554.703, "dur": 0.967, + "args": { + "External id": 987550,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940425572.651, "dur": 14.938, + "args": { + "External id": 987551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940425588.655, "dur": 13.033, + "args": { + "External id": 987552,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940425610.299, "dur": 1.840, + "args": { + "External id": 987553,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940425622.312, "dur": 4.992, + "args": { + "External id": 987554,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940425625.748, "dur": 0.603, + "args": { + "External id": 987555,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940425703.834, "dur": 61.112, + "args": { + "External id": 987556,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940425770.769, "dur": 5.714, + "args": { + "External id": 987557,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940425774.372, "dur": 0.760, + "args": { + "External id": 987558,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940425778.018, "dur": 30.738, + "args": { + "External id": 987559,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940425814.625, "dur": 6.835, + "args": { + "External id": 987560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940425816.625, "dur": 4.131, + "args": { + "External id": 987561,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940425818.640, "dur": 1.908, + "args": { + "External id": 987562,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940425824.408, "dur": 47.417, + "args": { + "External id": 987563,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940425825.367, "dur": 45.518, + "args": { + "External id": 987564,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940425876.379, "dur": 16.340, + "args": { + "External id": 987565,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940425898.865, "dur": 6.695, + "args": { + "External id": 987566,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940425903.514, "dur": 0.978, + "args": { + "External id": 987567,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940425909.483, "dur": 50.053, + "args": { + "External id": 987568,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940425910.467, "dur": 5.326, + "args": { + "External id": 987569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940425911.113, "dur": 3.991, + "args": { + "External id": 987570,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940425914.438, "dur": 0.506, + "args": { + "External id": 987571,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940425916.409, "dur": 42.745, + "args": { + "External id": 987572,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940425917.115, "dur": 41.256, + "args": { + "External id": 987573,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940425964.582, "dur": 4.518, + "args": { + "External id": 987574,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940425967.140, "dur": 0.609, + "args": { + "External id": 987575,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940425975.236, "dur": 1.588, + "args": { + "External id": 987576,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940425985.212, "dur": 12.577, + "args": { + "External id": 987577,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940425990.666, "dur": 6.759, + "args": { + "External id": 987578,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940426169.260, "dur": 272.563, + "args": { + "External id": 987579,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940426173.319, "dur": 3.551, + "args": { + "External id": 987580,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940426179.100, "dur": 262.268, + "args": { + "External id": 987581,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940426180.660, "dur": 0.465, + "args": { + "External id": 987582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940426182.860, "dur": 26.593, + "args": { + "External id": 987583,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940426211.620, "dur": 3.743, + "args": { + "External id": 987584,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940426214.073, "dur": 0.895, + "args": { + "External id": 987585,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940426216.693, "dur": 31.693, + "args": { + "External id": 987586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940426218.561, "dur": 1.695, + "args": { + "External id": 987587,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940426222.089, "dur": 25.839, + "args": { + "External id": 987588,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940426225.373, "dur": 3.835, + "args": { + "External id": 987589,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940426250.526, "dur": 33.741, + "args": { + "External id": 987590,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940426286.033, "dur": 40.552, + "args": { + "External id": 987591,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940426331.081, "dur": 33.699, + "args": { + "External id": 987592,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940426366.620, "dur": 14.908, + "args": { + "External id": 987593,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940426384.067, "dur": 25.531, + "args": { + "External id": 987594,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940426386.780, "dur": 1.452, + "args": { + "External id": 987595,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940426390.190, "dur": 0.893, + "args": { + "External id": 987596,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940426411.168, "dur": 13.100, + "args": { + "External id": 987597,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940426425.343, "dur": 14.999, + "args": { + "External id": 987598,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940426450.427, "dur": 2.302, + "args": { + "External id": 987599,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940426463.984, "dur": 5.476, + "args": { + "External id": 987600,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940426466.815, "dur": 0.570, + "args": { + "External id": 987601,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940426547.950, "dur": 74.160, + "args": { + "External id": 987602,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940426627.889, "dur": 5.425, + "args": { + "External id": 987603,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940426631.195, "dur": 0.859, + "args": { + "External id": 987604,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940426635.439, "dur": 33.393, + "args": { + "External id": 987605,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940426674.074, "dur": 5.841, + "args": { + "External id": 987606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940426676.080, "dur": 2.910, + "args": { + "External id": 987607,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940426677.988, "dur": 0.784, + "args": { + "External id": 987608,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940426683.439, "dur": 48.734, + "args": { + "External id": 987609,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940426684.688, "dur": 46.815, + "args": { + "External id": 987610,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940426737.099, "dur": 18.862, + "args": { + "External id": 987611,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940426762.881, "dur": 4.531, + "args": { + "External id": 987612,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940426765.519, "dur": 0.585, + "args": { + "External id": 987613,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940426772.153, "dur": 53.061, + "args": { + "External id": 987614,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940426773.264, "dur": 4.340, + "args": { + "External id": 987615,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940426774.195, "dur": 2.782, + "args": { + "External id": 987616,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940426776.414, "dur": 0.392, + "args": { + "External id": 987617,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940426778.280, "dur": 46.468, + "args": { + "External id": 987618,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940426778.785, "dur": 45.115, + "args": { + "External id": 987619,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940426830.180, "dur": 4.640, + "args": { + "External id": 987620,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940426832.756, "dur": 0.544, + "args": { + "External id": 987621,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940426843.437, "dur": 1.490, + "args": { + "External id": 987622,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940426853.955, "dur": 8.560, + "args": { + "External id": 987623,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940426856.182, "dur": 5.981, + "args": { + "External id": 987624,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940426960.748, "dur": 282.926, + "args": { + "External id": 987625,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940426966.875, "dur": 2.201, + "args": { + "External id": 987626,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940426970.715, "dur": 272.369, + "args": { + "External id": 987627,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940426974.241, "dur": 0.317, + "args": { + "External id": 987628,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940426975.986, "dur": 25.238, + "args": { + "External id": 987629,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940427003.727, "dur": 23.534, + "args": { + "External id": 987630,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940427006.449, "dur": 19.920, + "args": { + "External id": 987631,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940427030.628, "dur": 69.181, + "args": { + "External id": 987632,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940427033.124, "dur": 2.340, + "args": { + "External id": 987633,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940427037.031, "dur": 61.791, + "args": { + "External id": 987634,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940427040.759, "dur": 2.816, + "args": { + "External id": 987635,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940427102.588, "dur": 27.214, + "args": { + "External id": 987636,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940427132.048, "dur": 16.166, + "args": { + "External id": 987637,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940427151.509, "dur": 15.561, + "args": { + "External id": 987638,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940427168.721, "dur": 15.550, + "args": { + "External id": 987639,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940427186.779, "dur": 24.815, + "args": { + "External id": 987640,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940427189.758, "dur": 2.225, + "args": { + "External id": 987641,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940427194.591, "dur": 0.912, + "args": { + "External id": 987642,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940427213.370, "dur": 14.142, + "args": { + "External id": 987643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940427228.799, "dur": 13.110, + "args": { + "External id": 987644,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940427253.752, "dur": 2.458, + "args": { + "External id": 987645,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940427267.883, "dur": 5.022, + "args": { + "External id": 987646,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940427270.972, "dur": 0.854, + "args": { + "External id": 987647,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940427354.520, "dur": 65.799, + "args": { + "External id": 987648,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940427447.024, "dur": 6.064, + "args": { + "External id": 987649,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940427451.042, "dur": 0.683, + "args": { + "External id": 987650,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940427454.601, "dur": 28.091, + "args": { + "External id": 987651,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940427488.012, "dur": 7.417, + "args": { + "External id": 987652,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940427489.641, "dur": 5.012, + "args": { + "External id": 987653,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940427493.624, "dur": 0.780, + "args": { + "External id": 987654,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940427498.973, "dur": 47.437, + "args": { + "External id": 987655,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940427500.078, "dur": 45.378, + "args": { + "External id": 987656,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940427551.303, "dur": 17.717, + "args": { + "External id": 987657,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940427575.435, "dur": 4.429, + "args": { + "External id": 987658,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940427577.963, "dur": 0.596, + "args": { + "External id": 987659,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940427587.284, "dur": 50.910, + "args": { + "External id": 987660,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940427588.330, "dur": 3.865, + "args": { + "External id": 987661,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940427589.016, "dur": 2.478, + "args": { + "External id": 987662,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940427590.795, "dur": 0.536, + "args": { + "External id": 987663,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940427592.827, "dur": 44.935, + "args": { + "External id": 987664,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940427593.384, "dur": 43.591, + "args": { + "External id": 987665,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940427642.981, "dur": 4.098, + "args": { + "External id": 987666,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940427645.299, "dur": 0.355, + "args": { + "External id": 987667,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940427656.455, "dur": 1.534, + "args": { + "External id": 987668,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940427667.362, "dur": 7.612, + "args": { + "External id": 987669,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940427670.024, "dur": 4.656, + "args": { + "External id": 987670,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940427771.331, "dur": 201.033, + "args": { + "External id": 987671,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940427773.822, "dur": 2.396, + "args": { + "External id": 987672,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940427777.690, "dur": 194.139, + "args": { + "External id": 987673,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940427781.284, "dur": 0.467, + "args": { + "External id": 987674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940427783.559, "dur": 23.398, + "args": { + "External id": 987675,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940427808.785, "dur": 4.143, + "args": { + "External id": 987676,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940427811.816, "dur": 0.781, + "args": { + "External id": 987677,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940427813.928, "dur": 24.183, + "args": { + "External id": 987678,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940427815.291, "dur": 1.515, + "args": { + "External id": 987679,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940427818.270, "dur": 19.504, + "args": { + "External id": 987680,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940427821.360, "dur": 2.984, + "args": { + "External id": 987681,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940427839.376, "dur": 23.866, + "args": { + "External id": 987682,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940427864.917, "dur": 15.472, + "args": { + "External id": 987683,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940427883.613, "dur": 16.287, + "args": { + "External id": 987684,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940427901.313, "dur": 19.320, + "args": { + "External id": 987685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940427922.641, "dur": 22.011, + "args": { + "External id": 987686,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940427924.736, "dur": 1.487, + "args": { + "External id": 987687,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940427928.020, "dur": 0.817, + "args": { + "External id": 987688,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940427946.319, "dur": 12.091, + "args": { + "External id": 987689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940427959.468, "dur": 11.238, + "args": { + "External id": 987690,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940427979.454, "dur": 1.571, + "args": { + "External id": 987691,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940427991.022, "dur": 3.927, + "args": { + "External id": 987692,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940427993.541, "dur": 0.438, + "args": { + "External id": 987693,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940428135.625, "dur": 69.334, + "args": { + "External id": 987694,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940428213.634, "dur": 8.013, + "args": { + "External id": 987695,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940428218.708, "dur": 1.012, + "args": { + "External id": 987696,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940428223.151, "dur": 30.706, + "args": { + "External id": 987697,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940428259.341, "dur": 6.045, + "args": { + "External id": 987698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940428261.352, "dur": 3.244, + "args": { + "External id": 987699,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940428263.326, "dur": 1.006, + "args": { + "External id": 987700,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940428269.648, "dur": 45.754, + "args": { + "External id": 987701,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940428270.979, "dur": 43.663, + "args": { + "External id": 987702,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940428320.295, "dur": 16.462, + "args": { + "External id": 987703,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940428343.658, "dur": 4.348, + "args": { + "External id": 987704,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940428346.339, "dur": 0.490, + "args": { + "External id": 987705,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940428352.500, "dur": 50.945, + "args": { + "External id": 987706,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940428353.797, "dur": 5.983, + "args": { + "External id": 987707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940428356.575, "dur": 2.570, + "args": { + "External id": 987708,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940428358.145, "dur": 0.820, + "args": { + "External id": 987709,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940428360.406, "dur": 42.667, + "args": { + "External id": 987710,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940428361.249, "dur": 41.077, + "args": { + "External id": 987711,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940428407.504, "dur": 4.822, + "args": { + "External id": 987712,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940428410.139, "dur": 0.693, + "args": { + "External id": 987713,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940428421.052, "dur": 2.066, + "args": { + "External id": 987714,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940428432.480, "dur": 9.116, + "args": { + "External id": 987715,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940428434.713, "dur": 6.592, + "args": { + "External id": 987716,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940428534.388, "dur": 212.419, + "args": { + "External id": 987717,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940428537.163, "dur": 2.257, + "args": { + "External id": 987718,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940428545.359, "dur": 200.981, + "args": { + "External id": 987719,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940428549.248, "dur": 0.553, + "args": { + "External id": 987720,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940428553.302, "dur": 25.365, + "args": { + "External id": 987721,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940428580.454, "dur": 4.180, + "args": { + "External id": 987722,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940428583.304, "dur": 1.016, + "args": { + "External id": 987723,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940428585.583, "dur": 25.888, + "args": { + "External id": 987724,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940428587.410, "dur": 1.437, + "args": { + "External id": 987725,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940428590.777, "dur": 20.376, + "args": { + "External id": 987726,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940428594.070, "dur": 2.627, + "args": { + "External id": 987727,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940428613.265, "dur": 23.032, + "args": { + "External id": 987728,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940428637.697, "dur": 16.501, + "args": { + "External id": 987729,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940428656.784, "dur": 15.505, + "args": { + "External id": 987730,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940428673.943, "dur": 15.500, + "args": { + "External id": 987731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940428691.713, "dur": 23.124, + "args": { + "External id": 987732,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940428693.813, "dur": 1.696, + "args": { + "External id": 987733,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940428698.152, "dur": 0.641, + "args": { + "External id": 987734,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940428716.868, "dur": 14.509, + "args": { + "External id": 987735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940428732.630, "dur": 12.479, + "args": { + "External id": 987736,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940428753.517, "dur": 1.605, + "args": { + "External id": 987737,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940428765.141, "dur": 3.837, + "args": { + "External id": 987738,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940428767.659, "dur": 0.390, + "args": { + "External id": 987739,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940428838.431, "dur": 54.059, + "args": { + "External id": 987740,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940428899.982, "dur": 4.986, + "args": { + "External id": 987741,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940428902.944, "dur": 0.795, + "args": { + "External id": 987742,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940428906.460, "dur": 24.869, + "args": { + "External id": 987743,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940428935.826, "dur": 5.253, + "args": { + "External id": 987744,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940428937.405, "dur": 2.802, + "args": { + "External id": 987745,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940428939.378, "dur": 0.633, + "args": { + "External id": 987746,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940428945.229, "dur": 42.903, + "args": { + "External id": 987747,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940428946.327, "dur": 41.007, + "args": { + "External id": 987748,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940428991.969, "dur": 35.014, + "args": { + "External id": 987749,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940429035.355, "dur": 5.055, + "args": { + "External id": 987750,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940429038.160, "dur": 0.880, + "args": { + "External id": 987751,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940429045.364, "dur": 100.574, + "args": { + "External id": 987752,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940429046.405, "dur": 45.622, + "args": { + "External id": 987753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940429050.486, "dur": 40.501, + "args": { + "External id": 987754,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940429089.368, "dur": 1.105, + "args": { + "External id": 987755,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940429093.282, "dur": 52.171, + "args": { + "External id": 987756,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940429094.176, "dur": 50.397, + "args": { + "External id": 987757,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940429151.694, "dur": 7.703, + "args": { + "External id": 987758,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940429154.625, "dur": 3.256, + "args": { + "External id": 987759,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940429167.232, "dur": 1.904, + "args": { + "External id": 987760,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940429179.889, "dur": 7.408, + "args": { + "External id": 987761,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940429182.496, "dur": 4.528, + "args": { + "External id": 987762,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940429288.036, "dur": 222.796, + "args": { + "External id": 987763,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940429290.980, "dur": 2.742, + "args": { + "External id": 987764,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940429295.195, "dur": 215.081, + "args": { + "External id": 987765,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940429296.683, "dur": 0.732, + "args": { + "External id": 987766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940429299.075, "dur": 28.086, + "args": { + "External id": 987767,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940429328.777, "dur": 4.315, + "args": { + "External id": 987768,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940429331.808, "dur": 0.969, + "args": { + "External id": 987769,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940429334.409, "dur": 30.047, + "args": { + "External id": 987770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940429335.795, "dur": 1.414, + "args": { + "External id": 987771,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940429339.111, "dur": 24.905, + "args": { + "External id": 987772,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940429344.629, "dur": 3.875, + "args": { + "External id": 987773,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940429366.077, "dur": 25.069, + "args": { + "External id": 987774,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940429393.235, "dur": 15.714, + "args": { + "External id": 987775,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940429412.197, "dur": 15.619, + "args": { + "External id": 987776,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940429429.437, "dur": 16.603, + "args": { + "External id": 987777,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940429448.341, "dur": 24.136, + "args": { + "External id": 987778,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940429451.235, "dur": 1.675, + "args": { + "External id": 987779,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940429454.948, "dur": 0.943, + "args": { + "External id": 987780,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940429478.107, "dur": 15.193, + "args": { + "External id": 987781,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940429494.985, "dur": 14.162, + "args": { + "External id": 987782,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940429518.329, "dur": 1.887, + "args": { + "External id": 987783,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940429530.485, "dur": 4.991, + "args": { + "External id": 987784,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940429533.810, "dur": 0.683, + "args": { + "External id": 987785,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940429613.135, "dur": 64.319, + "args": { + "External id": 987786,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940429683.557, "dur": 5.992, + "args": { + "External id": 987787,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940429687.332, "dur": 0.969, + "args": { + "External id": 987788,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940429691.497, "dur": 31.278, + "args": { + "External id": 987789,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940429727.949, "dur": 7.819, + "args": { + "External id": 987790,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940429729.656, "dur": 5.254, + "args": { + "External id": 987791,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940429733.790, "dur": 0.872, + "args": { + "External id": 987792,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940429739.010, "dur": 47.648, + "args": { + "External id": 987793,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940429740.253, "dur": 45.657, + "args": { + "External id": 987794,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940429791.325, "dur": 17.943, + "args": { + "External id": 987795,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940429816.000, "dur": 4.127, + "args": { + "External id": 987796,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940429818.539, "dur": 0.610, + "args": { + "External id": 987797,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338706, "tid": 2338706, + "ts": 6345940429826.691, "dur": 50.747, + "args": { + "External id": 987798,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940429828.007, "dur": 3.830, + "args": { + "External id": 987799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940429828.963, "dur": 2.213, + "args": { + "External id": 987800,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940429830.495, "dur": 0.516, + "args": { + "External id": 987801,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940429832.480, "dur": 44.522, + "args": { + "External id": 987802,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940429833.110, "dur": 43.166, + "args": { + "External id": 987803,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940429883.563, "dur": 3.972, + "args": { + "External id": 987804,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940429885.681, "dur": 0.606, + "args": { + "External id": 987805,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940429893.620, "dur": 1.577, + "args": { + "External id": 987806,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940429903.265, "dur": 7.713, + "args": { + "External id": 987807,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940429905.810, "dur": 4.874, + "args": { + "External id": 987808,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940430005.311, "dur": 283.266, + "args": { + "External id": 987809,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940430027.235, "dur": 6.063, + "args": { + "External id": 987810,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338706, "tid": 2338706, + "ts": 6345940430035.118, "dur": 252.827, + "args": { + "External id": 987811,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338706, "tid": 2338706, + "ts": 6345940430037.171, "dur": 0.439, + "args": { + "External id": 987812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338706, "tid": 2338706, + "ts": 6345940430039.541, "dur": 69.198, + "args": { + "External id": 987813,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338706, "tid": 2338706, + "ts": 6345940430112.652, "dur": 4.157, + "args": { + "External id": 987814,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940430115.460, "dur": 1.024, + "args": { + "External id": 987815,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940430117.934, "dur": 33.074, + "args": { + "External id": 987816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345940430119.686, "dur": 2.101, + "args": { + "External id": 987817,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345940430123.422, "dur": 27.093, + "args": { + "External id": 987818,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940430127.342, "dur": 3.150, + "args": { + "External id": 987819,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345940430152.508, "dur": 23.257, + "args": { + "External id": 987820,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940430177.350, "dur": 17.591, + "args": { + "External id": 987821,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338706, "tid": 2338706, + "ts": 6345940430198.192, "dur": 15.401, + "args": { + "External id": 987822,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338706, "tid": 2338706, + "ts": 6345940430214.992, "dur": 15.477, + "args": { + "External id": 987823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940430232.471, "dur": 23.541, + "args": { + "External id": 987824,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345940430235.075, "dur": 1.789, + "args": { + "External id": 987825,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940430238.742, "dur": 0.628, + "args": { + "External id": 987826,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338706, "tid": 2338706, + "ts": 6345940430258.169, "dur": 14.420, + "args": { + "External id": 987827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940430273.530, "dur": 13.262, + "args": { + "External id": 987828,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345940430298.116, "dur": 2.905, + "args": { + "External id": 987829,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940430312.179, "dur": 4.402, + "args": { + "External id": 987830,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940430314.930, "dur": 0.636, + "args": { + "External id": 987831,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940430397.630, "dur": 68.311, + "args": { + "External id": 987832,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338706, "tid": 2338706, + "ts": 6345940430471.823, "dur": 6.098, + "args": { + "External id": 987833,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940430475.853, "dur": 0.826, + "args": { + "External id": 987834,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940430479.727, "dur": 30.278, + "args": { + "External id": 987835,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338706, "tid": 2338706, + "ts": 6345940430516.959, "dur": 5.211, + "args": { + "External id": 987836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338706, "tid": 2338706, + "ts": 6345940430518.748, "dur": 2.782, + "args": { + "External id": 987837,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940430520.604, "dur": 0.711, + "args": { + "External id": 987838,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338706, "tid": 2338706, + "ts": 6345940430525.367, "dur": 48.253, + "args": { + "External id": 987839,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338706, "tid": 2338706, + "ts": 6345940430526.461, "dur": 46.245, + "args": { + "External id": 987840,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940430577.983, "dur": 18.337, + "args": { + "External id": 987841,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940430601.048, "dur": 30.709, + "args": { + "External id": 987842,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338706, "tid": 2338706, + "ts": 6345940430606.676, "dur": 24.562, + "args": { + "External id": 987843,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940430612.263, "dur": 0.843, + "args": { + "External id": 987844,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 20744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345940430637.660, "dur": 30.078, + "args": { + "External id": 987845,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338706, "tid": 2338706, + "ts": 6345940430639.742, "dur": 27.744, + "args": { + "External id": 987846,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 20746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940430644.614, "dur": 3.756, + "args": { + "External id": 987847,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345940430649.756, "dur": 17.074, + "args": { + "External id": 987848,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2338706, + "ts": 6345940430680.399, "dur": 6.601, + "args": { + "External id": 987849,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2338706, + "ts": 6345940430682.706, "dur": 3.933, + "args": { + "External id": 987850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338706, "tid": 2338706, + "ts": 6345940430688.376, "dur": 3.784, + "args": { + "External id": 987851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338706, "tid": 2338706, + "ts": 6345940430691.453, "dur": 0.608, + "args": { + "External id": 987852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940430740.417, "dur": 25.650, + "args": { + "External id": 987853,"Sequence number": 10552695, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338706, "tid": 2338706, + "ts": 6345940430768.297, "dur": 17.723, + "args": { + "External id": 987854,"Sequence number": 10552696, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20754 + } + }, + { + "ph": "s", "id": 226, "pid": 2338706, "tid": 2338706, "ts": 6345940430768.297, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward", "pid": 2338706, "tid": 2338706, + "ts": 6345940430911.731, "dur": 48.543, + "args": { + "External id": 987855,"Record function id": 0, "Ev Idx": 20755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338706, "tid": 2338706, + "ts": 6345940431157.033, "dur": 50.935, + "args": { + "External id": 987856,"Sequence number": 10552697, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 20756 + } + }, + { + "ph": "s", "id": 225, "pid": 2338706, "tid": 2338706, "ts": 6345940431157.033, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ones_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940431245.559, "dur": 33.287, + "args": { + "External id": 987857,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 20757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345940431247.373, "dur": 10.622, + "args": { + "External id": 987858,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 20758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345940431252.573, "dur": 4.753, + "args": { + "External id": 987859,"Record function id": 0, "Concrete Inputs": ["[1]", "[1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345940431259.759, "dur": 18.711, + "args": { + "External id": 987860,"Record function id": 0, "Concrete Inputs": ["", "1."], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 20760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338706, "tid": 2338706, + "ts": 6345942512912.404, "dur": 78.975, + "args": { + "External id": 987861,"Sequence number": 10552698, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 20761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338706, "tid": 2338706, + "ts": 6345942513001.084, "dur": 46.148, + "args": { + "External id": 987862,"Sequence number": 10552699, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 20762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2338706, + "ts": 6345942513101.446, "dur": 35.129, + "args": { + "External id": 987863,"Sequence number": 10552700, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2338706, + "ts": 6345942513140.788, "dur": 202.607, + "args": { + "External id": 987864,"Sequence number": 10552701, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[1], [1], []], "Ev Idx": 20764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2338706, + "ts": 6345942514627.004, "dur": 50.192, + "args": { + "External id": 987865,"Sequence number": 10552702, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2338706, + "ts": 6345942514681.266, "dur": 17.865, + "args": { + "External id": 987866,"Sequence number": 10552703, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2338706, + "ts": 6345942514711.560, "dur": 17.364, + "args": { + "External id": 987867,"Sequence number": 10552704, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2338706, + "ts": 6345942514731.159, "dur": 16.148, + "args": { + "External id": 987868,"Sequence number": 10552705, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[1], [1], []], "Ev Idx": 20768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 2338706, "tid": 2338706, + "ts": 6345942517760.424, "dur": 4012.393, + "args": { + "External id": 987869,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 2338706, "tid": 2338706, + "ts": 6345942518468.313, "dur": 1615.763, + "args": { + "External id": 987870,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338706, "tid": 2338706, + "ts": 6345942518492.197, "dur": 89.278, + "args": { + "External id": 987871,"Record function id": 0, "Concrete Inputs": ["[68250]", "6", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 20771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345942518497.355, "dur": 19.967, + "args": { + "External id": 987872,"Record function id": 0, "Concrete Inputs": ["[68250]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338706, "tid": 2338706, + "ts": 6345942518522.477, "dur": 58.544, + "args": { + "External id": 987873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[68250]], "Ev Idx": 20773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338706, "tid": 2338706, + "ts": 6345942518527.304, "dur": 52.938, + "args": { + "External id": 987874,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[68250], []], "Ev Idx": 20774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521813.307, "dur": 5.003, + "args": { + "External id": 987875,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521820.681, "dur": 0.653, + "args": { + "External id": 987876,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521822.972, "dur": 1.363, + "args": { + "External id": 987877,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521825.641, "dur": 0.282, + "args": { + "External id": 987878,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521827.011, "dur": 0.360, + "args": { + "External id": 987879,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521828.559, "dur": 0.340, + "args": { + "External id": 987880,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521830.060, "dur": 0.479, + "args": { + "External id": 987881,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521833.983, "dur": 0.320, + "args": { + "External id": 987882,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521835.802, "dur": 0.261, + "args": { + "External id": 987883,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521837.267, "dur": 0.345, + "args": { + "External id": 987884,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521838.601, "dur": 0.269, + "args": { + "External id": 987885,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521840.128, "dur": 0.265, + "args": { + "External id": 987886,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521841.434, "dur": 0.247, + "args": { + "External id": 987887,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521842.902, "dur": 0.268, + "args": { + "External id": 987888,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521844.293, "dur": 0.246, + "args": { + "External id": 987889,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521847.273, "dur": 0.248, + "args": { + "External id": 987890,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521848.668, "dur": 0.247, + "args": { + "External id": 987891,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521850.059, "dur": 0.247, + "args": { + "External id": 987892,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521851.318, "dur": 0.245, + "args": { + "External id": 987893,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521852.559, "dur": 0.249, + "args": { + "External id": 987894,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521853.954, "dur": 0.258, + "args": { + "External id": 987895,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521855.282, "dur": 0.243, + "args": { + "External id": 987896,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521856.819, "dur": 0.263, + "args": { + "External id": 987897,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521859.845, "dur": 0.340, + "args": { + "External id": 987898,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521861.234, "dur": 0.250, + "args": { + "External id": 987899,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521862.725, "dur": 0.243, + "args": { + "External id": 987900,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521864.241, "dur": 0.244, + "args": { + "External id": 987901,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521865.562, "dur": 0.244, + "args": { + "External id": 987902,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521867.180, "dur": 0.499, + "args": { + "External id": 987903,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521868.745, "dur": 0.320, + "args": { + "External id": 987904,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521870.068, "dur": 0.331, + "args": { + "External id": 987905,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521873.749, "dur": 0.255, + "args": { + "External id": 987906,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521875.068, "dur": 0.244, + "args": { + "External id": 987907,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521876.670, "dur": 0.297, + "args": { + "External id": 987908,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521878.152, "dur": 0.242, + "args": { + "External id": 987909,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521879.361, "dur": 0.244, + "args": { + "External id": 987910,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521880.812, "dur": 0.260, + "args": { + "External id": 987911,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521882.103, "dur": 0.272, + "args": { + "External id": 987912,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521883.518, "dur": 0.254, + "args": { + "External id": 987913,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521886.828, "dur": 0.242, + "args": { + "External id": 987914,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521888.277, "dur": 0.242, + "args": { + "External id": 987915,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521889.672, "dur": 0.241, + "args": { + "External id": 987916,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521890.941, "dur": 0.244, + "args": { + "External id": 987917,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521892.279, "dur": 0.242, + "args": { + "External id": 987918,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521893.515, "dur": 0.249, + "args": { + "External id": 987919,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521894.759, "dur": 0.269, + "args": { + "External id": 987920,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521896.111, "dur": 0.275, + "args": { + "External id": 987921,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521899.465, "dur": 0.268, + "args": { + "External id": 987922,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521900.819, "dur": 0.242, + "args": { + "External id": 987923,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521902.333, "dur": 0.243, + "args": { + "External id": 987924,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521903.767, "dur": 0.249, + "args": { + "External id": 987925,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521906.159, "dur": 0.262, + "args": { + "External id": 987926,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521907.459, "dur": 0.253, + "args": { + "External id": 987927,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521908.890, "dur": 0.369, + "args": { + "External id": 987928,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521910.228, "dur": 0.391, + "args": { + "External id": 987929,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521914.110, "dur": 0.242, + "args": { + "External id": 987930,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521915.373, "dur": 0.241, + "args": { + "External id": 987931,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521916.723, "dur": 0.301, + "args": { + "External id": 987932,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521918.069, "dur": 0.324, + "args": { + "External id": 987933,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521919.369, "dur": 0.334, + "args": { + "External id": 987934,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521920.705, "dur": 0.354, + "args": { + "External id": 987935,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521922.303, "dur": 0.330, + "args": { + "External id": 987936,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521923.634, "dur": 0.345, + "args": { + "External id": 987937,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521927.097, "dur": 0.242, + "args": { + "External id": 987938,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521928.323, "dur": 0.242, + "args": { + "External id": 987939,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521929.743, "dur": 0.247, + "args": { + "External id": 987940,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521931.134, "dur": 0.244, + "args": { + "External id": 987941,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521932.528, "dur": 0.266, + "args": { + "External id": 987942,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521933.990, "dur": 0.245, + "args": { + "External id": 987943,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521935.234, "dur": 0.245, + "args": { + "External id": 987944,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521936.586, "dur": 0.465, + "args": { + "External id": 987945,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521939.377, "dur": 0.466, + "args": { + "External id": 987946,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521940.827, "dur": 0.640, + "args": { + "External id": 987947,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521942.436, "dur": 0.556, + "args": { + "External id": 987948,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521943.963, "dur": 0.242, + "args": { + "External id": 987949,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521945.326, "dur": 0.267, + "args": { + "External id": 987950,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521946.707, "dur": 0.310, + "args": { + "External id": 987951,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521948.888, "dur": 0.329, + "args": { + "External id": 987952,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521950.266, "dur": 0.242, + "args": { + "External id": 987953,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521953.846, "dur": 0.246, + "args": { + "External id": 987954,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521955.119, "dur": 0.248, + "args": { + "External id": 987955,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521956.385, "dur": 0.242, + "args": { + "External id": 987956,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521957.652, "dur": 0.245, + "args": { + "External id": 987957,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521958.968, "dur": 0.269, + "args": { + "External id": 987958,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521960.416, "dur": 0.350, + "args": { + "External id": 987959,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521962.016, "dur": 0.344, + "args": { + "External id": 987960,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521963.439, "dur": 0.454, + "args": { + "External id": 987961,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521966.875, "dur": 0.253, + "args": { + "External id": 987962,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521968.126, "dur": 0.428, + "args": { + "External id": 987963,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521969.672, "dur": 0.454, + "args": { + "External id": 987964,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521971.212, "dur": 0.245, + "args": { + "External id": 987965,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521972.456, "dur": 0.348, + "args": { + "External id": 987966,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521973.781, "dur": 0.244, + "args": { + "External id": 987967,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521975.744, "dur": 0.322, + "args": { + "External id": 987968,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521977.168, "dur": 0.243, + "args": { + "External id": 987969,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521980.341, "dur": 0.247, + "args": { + "External id": 987970,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521981.740, "dur": 0.242, + "args": { + "External id": 987971,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521982.972, "dur": 0.261, + "args": { + "External id": 987972,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521984.216, "dur": 0.243, + "args": { + "External id": 987973,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521985.490, "dur": 0.265, + "args": { + "External id": 987974,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521986.914, "dur": 0.244, + "args": { + "External id": 987975,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521988.148, "dur": 0.243, + "args": { + "External id": 987976,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521989.443, "dur": 0.280, + "args": { + "External id": 987977,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521992.919, "dur": 0.244, + "args": { + "External id": 987978,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521994.340, "dur": 0.274, + "args": { + "External id": 987979,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521995.613, "dur": 0.241, + "args": { + "External id": 987980,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521996.872, "dur": 0.241, + "args": { + "External id": 987981,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521998.173, "dur": 0.266, + "args": { + "External id": 987982,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942521999.454, "dur": 0.243, + "args": { + "External id": 987983,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522001.353, "dur": 0.246, + "args": { + "External id": 987984,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522002.694, "dur": 0.245, + "args": { + "External id": 987985,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522005.706, "dur": 0.249, + "args": { + "External id": 987986,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522006.980, "dur": 14.428, + "args": { + "External id": 987987,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522026.649, "dur": 0.343, + "args": { + "External id": 987988,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522028.079, "dur": 0.254, + "args": { + "External id": 987989,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522029.473, "dur": 0.251, + "args": { + "External id": 987990,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522030.788, "dur": 0.251, + "args": { + "External id": 987991,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522032.199, "dur": 0.274, + "args": { + "External id": 987992,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522033.666, "dur": 0.333, + "args": { + "External id": 987993,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522036.513, "dur": 0.344, + "args": { + "External id": 987994,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522038.105, "dur": 0.249, + "args": { + "External id": 987995,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522039.413, "dur": 0.288, + "args": { + "External id": 987996,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522041.001, "dur": 0.272, + "args": { + "External id": 987997,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522042.231, "dur": 0.428, + "args": { + "External id": 987998,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522043.628, "dur": 0.511, + "args": { + "External id": 987999,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522045.179, "dur": 0.384, + "args": { + "External id": 988000,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522046.541, "dur": 0.488, + "args": { + "External id": 988001,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522049.406, "dur": 0.473, + "args": { + "External id": 988002,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522051.098, "dur": 0.462, + "args": { + "External id": 988003,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522082.547, "dur": 0.782, + "args": { + "External id": 988004,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522087.446, "dur": 0.257, + "args": { + "External id": 988005,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522088.781, "dur": 0.398, + "args": { + "External id": 988006,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522090.429, "dur": 0.371, + "args": { + "External id": 988007,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522091.779, "dur": 0.354, + "args": { + "External id": 988008,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522093.245, "dur": 0.383, + "args": { + "External id": 988009,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522096.665, "dur": 0.452, + "args": { + "External id": 988010,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522098.313, "dur": 0.495, + "args": { + "External id": 988011,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522099.811, "dur": 0.354, + "args": { + "External id": 988012,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522101.315, "dur": 0.255, + "args": { + "External id": 988013,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522102.551, "dur": 0.368, + "args": { + "External id": 988014,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522103.873, "dur": 0.463, + "args": { + "External id": 988015,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522105.270, "dur": 0.362, + "args": { + "External id": 988016,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522106.649, "dur": 0.253, + "args": { + "External id": 988017,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522110.355, "dur": 0.520, + "args": { + "External id": 988018,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522111.934, "dur": 0.286, + "args": { + "External id": 988019,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522113.157, "dur": 0.355, + "args": { + "External id": 988020,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522114.434, "dur": 0.325, + "args": { + "External id": 988021,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522115.706, "dur": 0.360, + "args": { + "External id": 988022,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522117.028, "dur": 0.375, + "args": { + "External id": 988023,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522119.040, "dur": 0.274, + "args": { + "External id": 988024,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522120.334, "dur": 0.382, + "args": { + "External id": 988025,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522123.863, "dur": 0.341, + "args": { + "External id": 988026,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522125.316, "dur": 0.255, + "args": { + "External id": 988027,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522126.540, "dur": 0.253, + "args": { + "External id": 988028,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522127.779, "dur": 0.249, + "args": { + "External id": 988029,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522129.094, "dur": 0.267, + "args": { + "External id": 988030,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522130.313, "dur": 0.250, + "args": { + "External id": 988031,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522131.505, "dur": 0.252, + "args": { + "External id": 988032,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522132.683, "dur": 0.250, + "args": { + "External id": 988033,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522135.769, "dur": 0.251, + "args": { + "External id": 988034,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522137.528, "dur": 0.252, + "args": { + "External id": 988035,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522138.728, "dur": 0.251, + "args": { + "External id": 988036,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522140.113, "dur": 0.253, + "args": { + "External id": 988037,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522141.308, "dur": 0.251, + "args": { + "External id": 988038,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522142.641, "dur": 0.254, + "args": { + "External id": 988039,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522145.282, "dur": 0.271, + "args": { + "External id": 988040,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522146.578, "dur": 0.248, + "args": { + "External id": 988041,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522149.861, "dur": 0.282, + "args": { + "External id": 988042,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522151.087, "dur": 0.251, + "args": { + "External id": 988043,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522152.308, "dur": 0.253, + "args": { + "External id": 988044,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522153.554, "dur": 0.249, + "args": { + "External id": 988045,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522154.783, "dur": 0.265, + "args": { + "External id": 988046,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522156.151, "dur": 0.292, + "args": { + "External id": 988047,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522157.811, "dur": 0.255, + "args": { + "External id": 988048,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522159.003, "dur": 0.255, + "args": { + "External id": 988049,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522162.024, "dur": 0.250, + "args": { + "External id": 988050,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522163.296, "dur": 0.251, + "args": { + "External id": 988051,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522164.493, "dur": 0.248, + "args": { + "External id": 988052,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522165.710, "dur": 0.249, + "args": { + "External id": 988053,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522166.950, "dur": 0.265, + "args": { + "External id": 988054,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522168.166, "dur": 0.254, + "args": { + "External id": 988055,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522169.553, "dur": 0.250, + "args": { + "External id": 988056,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522170.818, "dur": 0.248, + "args": { + "External id": 988057,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522173.643, "dur": 0.254, + "args": { + "External id": 988058,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522175.104, "dur": 0.258, + "args": { + "External id": 988059,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522176.311, "dur": 0.248, + "args": { + "External id": 988060,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522177.508, "dur": 0.247, + "args": { + "External id": 988061,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522178.740, "dur": 0.250, + "args": { + "External id": 988062,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522179.934, "dur": 0.247, + "args": { + "External id": 988063,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522181.224, "dur": 0.252, + "args": { + "External id": 988064,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522182.466, "dur": 0.291, + "args": { + "External id": 988065,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522185.556, "dur": 0.396, + "args": { + "External id": 988066,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522187.046, "dur": 0.488, + "args": { + "External id": 988067,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522188.639, "dur": 0.361, + "args": { + "External id": 988068,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522189.920, "dur": 0.247, + "args": { + "External id": 988069,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522191.099, "dur": 0.391, + "args": { + "External id": 988070,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522192.459, "dur": 0.500, + "args": { + "External id": 988071,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522193.882, "dur": 0.351, + "args": { + "External id": 988072,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522195.336, "dur": 0.378, + "args": { + "External id": 988073,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522200.489, "dur": 0.396, + "args": { + "External id": 988074,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522201.938, "dur": 0.410, + "args": { + "External id": 988075,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522211.051, "dur": 0.336, + "args": { + "External id": 988076,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522212.591, "dur": 1.390, + "args": { + "External id": 988077,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522215.289, "dur": 0.278, + "args": { + "External id": 988078,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522216.468, "dur": 0.399, + "args": { + "External id": 988079,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522217.726, "dur": 0.363, + "args": { + "External id": 988080,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522219.097, "dur": 0.364, + "args": { + "External id": 988081,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522222.215, "dur": 0.366, + "args": { + "External id": 988082,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522223.577, "dur": 0.243, + "args": { + "External id": 988083,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522224.752, "dur": 0.374, + "args": { + "External id": 988084,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522226.072, "dur": 0.359, + "args": { + "External id": 988085,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522227.424, "dur": 0.538, + "args": { + "External id": 988086,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522228.937, "dur": 0.372, + "args": { + "External id": 988087,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522230.246, "dur": 0.242, + "args": { + "External id": 988088,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522231.491, "dur": 0.258, + "args": { + "External id": 988089,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522235.062, "dur": 0.240, + "args": { + "External id": 988090,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522236.219, "dur": 0.435, + "args": { + "External id": 988091,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522237.639, "dur": 0.404, + "args": { + "External id": 988092,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522238.935, "dur": 0.353, + "args": { + "External id": 988093,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522240.197, "dur": 0.352, + "args": { + "External id": 988094,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522241.462, "dur": 0.348, + "args": { + "External id": 988095,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522242.816, "dur": 0.412, + "args": { + "External id": 988096,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522244.199, "dur": 0.353, + "args": { + "External id": 988097,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522247.169, "dur": 0.391, + "args": { + "External id": 988098,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522248.479, "dur": 0.241, + "args": { + "External id": 988099,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522249.621, "dur": 0.248, + "args": { + "External id": 988100,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522250.774, "dur": 0.243, + "args": { + "External id": 988101,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522252.012, "dur": 0.245, + "args": { + "External id": 988102,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522253.213, "dur": 0.274, + "args": { + "External id": 988103,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522254.543, "dur": 0.253, + "args": { + "External id": 988104,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522255.749, "dur": 0.263, + "args": { + "External id": 988105,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522258.547, "dur": 0.258, + "args": { + "External id": 988106,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522259.769, "dur": 0.246, + "args": { + "External id": 988107,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522261.521, "dur": 0.266, + "args": { + "External id": 988108,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522262.745, "dur": 0.255, + "args": { + "External id": 988109,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522263.906, "dur": 0.258, + "args": { + "External id": 988110,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522265.081, "dur": 0.280, + "args": { + "External id": 988111,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522266.327, "dur": 0.253, + "args": { + "External id": 988112,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522267.592, "dur": 0.255, + "args": { + "External id": 988113,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522270.666, "dur": 0.264, + "args": { + "External id": 988114,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522271.978, "dur": 0.265, + "args": { + "External id": 988115,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522273.303, "dur": 0.274, + "args": { + "External id": 988116,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522274.615, "dur": 0.253, + "args": { + "External id": 988117,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522275.874, "dur": 0.253, + "args": { + "External id": 988118,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522277.197, "dur": 0.255, + "args": { + "External id": 988119,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522278.559, "dur": 0.261, + "args": { + "External id": 988120,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522280.179, "dur": 0.262, + "args": { + "External id": 988121,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522283.510, "dur": 0.267, + "args": { + "External id": 988122,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522284.893, "dur": 0.254, + "args": { + "External id": 988123,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522286.073, "dur": 0.255, + "args": { + "External id": 988124,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522287.343, "dur": 0.252, + "args": { + "External id": 988125,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522288.543, "dur": 0.255, + "args": { + "External id": 988126,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522289.720, "dur": 0.254, + "args": { + "External id": 988127,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522290.931, "dur": 0.254, + "args": { + "External id": 988128,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522292.128, "dur": 0.252, + "args": { + "External id": 988129,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522295.360, "dur": 0.261, + "args": { + "External id": 988130,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522296.580, "dur": 0.254, + "args": { + "External id": 988131,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522297.805, "dur": 0.258, + "args": { + "External id": 988132,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522298.982, "dur": 0.254, + "args": { + "External id": 988133,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522300.172, "dur": 0.255, + "args": { + "External id": 988134,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522301.387, "dur": 0.376, + "args": { + "External id": 988135,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522302.679, "dur": 0.346, + "args": { + "External id": 988136,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522303.921, "dur": 0.394, + "args": { + "External id": 988137,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522307.153, "dur": 0.367, + "args": { + "External id": 988138,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522308.561, "dur": 0.254, + "args": { + "External id": 988139,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522309.772, "dur": 0.372, + "args": { + "External id": 988140,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522311.167, "dur": 0.372, + "args": { + "External id": 988141,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522312.493, "dur": 0.374, + "args": { + "External id": 988142,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522313.896, "dur": 0.257, + "args": { + "External id": 988143,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522315.566, "dur": 0.254, + "args": { + "External id": 988144,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522316.785, "dur": 0.255, + "args": { + "External id": 988145,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522320.122, "dur": 0.257, + "args": { + "External id": 988146,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942522321.470, "dur": 0.260, + "args": { + "External id": 988147,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338706, "tid": 2338706, + "ts": 6345942522390.426, "dur": 1644.426, + "args": { + "External id": 988148,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338706, "tid": 2338706, + "ts": 6345942522788.815, "dur": 1133.576, + "args": { + "External id": 988149,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522797.363, "dur": 10.243, + "args": { + "External id": 988150,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522803.697, "dur": 3.407, + "args": { + "External id": 988151,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522808.443, "dur": 2.144, + "args": { + "External id": 988152,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522809.655, "dur": 0.826, + "args": { + "External id": 988153,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522810.995, "dur": 5.603, + "args": { + "External id": 988154,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522813.588, "dur": 2.854, + "args": { + "External id": 988155,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522817.043, "dur": 1.400, + "args": { + "External id": 988156,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522817.862, "dur": 0.420, + "args": { + "External id": 988157,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522818.768, "dur": 2.748, + "args": { + "External id": 988158,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522820.634, "dur": 0.791, + "args": { + "External id": 988159,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522821.806, "dur": 24.342, + "args": { + "External id": 988160,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522845.471, "dur": 0.584, + "args": { + "External id": 994305,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522849.135, "dur": 1.328, + "args": { + "External id": 994306,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522849.685, "dur": 0.687, + "args": { + "External id": 994307,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522850.790, "dur": 2.438, + "args": { + "External id": 994308,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522852.573, "dur": 0.566, + "args": { + "External id": 994309,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522853.495, "dur": 2.713, + "args": { + "External id": 994310,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522855.656, "dur": 0.440, + "args": { + "External id": 994311,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522856.477, "dur": 1.207, + "args": { + "External id": 994312,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522856.908, "dur": 0.687, + "args": { + "External id": 994313,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522857.949, "dur": 3.446, + "args": { + "External id": 994314,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522859.683, "dur": 1.600, + "args": { + "External id": 994315,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522861.685, "dur": 1.171, + "args": { + "External id": 994316,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522862.150, "dur": 0.629, + "args": { + "External id": 994317,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522863.119, "dur": 2.715, + "args": { + "External id": 994318,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522865.154, "dur": 0.589, + "args": { + "External id": 994319,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522866.101, "dur": 3.074, + "args": { + "External id": 994320,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522868.397, "dur": 0.702, + "args": { + "External id": 994321,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522871.162, "dur": 1.284, + "args": { + "External id": 994322,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522871.619, "dur": 0.723, + "args": { + "External id": 994323,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522872.711, "dur": 2.698, + "args": { + "External id": 994324,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522874.642, "dur": 0.681, + "args": { + "External id": 994325,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522875.764, "dur": 3.096, + "args": { + "External id": 994326,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522878.169, "dur": 0.618, + "args": { + "External id": 994327,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522879.137, "dur": 1.158, + "args": { + "External id": 994328,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522879.589, "dur": 0.620, + "args": { + "External id": 994329,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522880.561, "dur": 3.844, + "args": { + "External id": 994330,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522882.206, "dur": 2.113, + "args": { + "External id": 994331,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522884.695, "dur": 1.077, + "args": { + "External id": 994332,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522885.114, "dur": 0.585, + "args": { + "External id": 994333,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522886.039, "dur": 3.170, + "args": { + "External id": 994334,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522888.395, "dur": 0.728, + "args": { + "External id": 994335,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522889.662, "dur": 2.682, + "args": { + "External id": 994336,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522891.560, "dur": 0.668, + "args": { + "External id": 994337,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522893.875, "dur": 1.228, + "args": { + "External id": 994338,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522894.360, "dur": 0.632, + "args": { + "External id": 994339,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522895.370, "dur": 2.505, + "args": { + "External id": 994340,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522897.095, "dur": 0.667, + "args": { + "External id": 994341,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522898.139, "dur": 2.796, + "args": { + "External id": 994342,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522900.250, "dur": 0.600, + "args": { + "External id": 994343,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522901.217, "dur": 1.209, + "args": { + "External id": 994344,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522901.623, "dur": 0.725, + "args": { + "External id": 994345,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522902.697, "dur": 4.221, + "args": { + "External id": 994346,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522904.862, "dur": 1.937, + "args": { + "External id": 994347,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522907.207, "dur": 1.239, + "args": { + "External id": 994348,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522907.631, "dur": 0.737, + "args": { + "External id": 994349,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522908.729, "dur": 3.393, + "args": { + "External id": 994350,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522911.269, "dur": 0.770, + "args": { + "External id": 994351,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522912.388, "dur": 3.413, + "args": { + "External id": 994352,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522914.992, "dur": 0.730, + "args": { + "External id": 994353,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522917.355, "dur": 1.091, + "args": { + "External id": 994354,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522917.769, "dur": 0.600, + "args": { + "External id": 994355,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522918.885, "dur": 2.469, + "args": { + "External id": 994356,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522920.555, "dur": 0.717, + "args": { + "External id": 994357,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522921.636, "dur": 3.140, + "args": { + "External id": 994358,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522924.038, "dur": 0.657, + "args": { + "External id": 994359,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522925.062, "dur": 1.073, + "args": { + "External id": 994360,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522925.475, "dur": 0.583, + "args": { + "External id": 994361,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522926.465, "dur": 3.837, + "args": { + "External id": 994362,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522928.267, "dur": 1.951, + "args": { + "External id": 994363,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522930.583, "dur": 1.259, + "args": { + "External id": 994364,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522931.031, "dur": 0.738, + "args": { + "External id": 994365,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522932.133, "dur": 2.781, + "args": { + "External id": 994366,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522934.119, "dur": 0.711, + "args": { + "External id": 994367,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522935.177, "dur": 2.768, + "args": { + "External id": 994368,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522937.087, "dur": 0.787, + "args": { + "External id": 994369,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522940.105, "dur": 1.310, + "args": { + "External id": 994370,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522940.538, "dur": 0.797, + "args": { + "External id": 994371,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522941.682, "dur": 2.818, + "args": { + "External id": 994372,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522943.382, "dur": 1.034, + "args": { + "External id": 994373,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522944.801, "dur": 2.895, + "args": { + "External id": 994374,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522946.953, "dur": 0.669, + "args": { + "External id": 994375,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522948.012, "dur": 1.384, + "args": { + "External id": 994376,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522948.429, "dur": 0.883, + "args": { + "External id": 994377,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522949.668, "dur": 4.095, + "args": { + "External id": 994378,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522951.670, "dur": 2.002, + "args": { + "External id": 994379,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522954.031, "dur": 1.348, + "args": { + "External id": 994380,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522954.445, "dur": 0.856, + "args": { + "External id": 994381,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522955.643, "dur": 2.446, + "args": { + "External id": 994382,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522957.237, "dur": 0.766, + "args": { + "External id": 994383,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522958.354, "dur": 4.355, + "args": { + "External id": 994384,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522961.965, "dur": 0.667, + "args": { + "External id": 994385,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522964.687, "dur": 1.246, + "args": { + "External id": 994386,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522965.189, "dur": 0.661, + "args": { + "External id": 994387,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522966.209, "dur": 2.721, + "args": { + "External id": 994388,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522967.869, "dur": 0.947, + "args": { + "External id": 994389,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522969.386, "dur": 2.798, + "args": { + "External id": 994390,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522971.180, "dur": 0.930, + "args": { + "External id": 994391,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522972.448, "dur": 1.308, + "args": { + "External id": 994392,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522972.863, "dur": 0.813, + "args": { + "External id": 994393,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522974.022, "dur": 3.307, + "args": { + "External id": 994394,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522975.610, "dur": 1.632, + "args": { + "External id": 994395,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522977.615, "dur": 1.319, + "args": { + "External id": 994396,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522978.023, "dur": 0.830, + "args": { + "External id": 994397,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522979.441, "dur": 3.350, + "args": { + "External id": 994398,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522981.462, "dur": 1.218, + "args": { + "External id": 994399,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522983.088, "dur": 2.261, + "args": { + "External id": 994400,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522984.711, "dur": 0.531, + "args": { + "External id": 994401,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522986.826, "dur": 1.500, + "args": { + "External id": 994402,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522987.479, "dur": 0.767, + "args": { + "External id": 994403,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522988.589, "dur": 2.720, + "args": { + "External id": 994404,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522990.485, "dur": 0.741, + "args": { + "External id": 994405,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522991.749, "dur": 3.850, + "args": { + "External id": 994406,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522994.530, "dur": 0.992, + "args": { + "External id": 994407,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522995.862, "dur": 1.154, + "args": { + "External id": 994408,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522996.279, "dur": 0.656, + "args": { + "External id": 994409,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942522997.282, "dur": 4.272, + "args": { + "External id": 994410,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942522998.946, "dur": 2.292, + "args": { + "External id": 994411,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523001.823, "dur": 1.498, + "args": { + "External id": 994412,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523002.238, "dur": 1.001, + "args": { + "External id": 994413,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523003.622, "dur": 2.771, + "args": { + "External id": 994414,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523005.524, "dur": 0.779, + "args": { + "External id": 994415,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523006.655, "dur": 15.806, + "args": { + "External id": 994416,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523020.209, "dur": 1.069, + "args": { + "External id": 994417,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523026.372, "dur": 1.489, + "args": { + "External id": 994418,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523027.201, "dur": 0.581, + "args": { + "External id": 994419,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523028.190, "dur": 2.687, + "args": { + "External id": 994420,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523029.957, "dur": 0.832, + "args": { + "External id": 994421,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523031.169, "dur": 2.425, + "args": { + "External id": 994422,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523032.847, "dur": 0.672, + "args": { + "External id": 994423,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523033.899, "dur": 1.691, + "args": { + "External id": 994424,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523034.844, "dur": 0.632, + "args": { + "External id": 994425,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523035.879, "dur": 3.649, + "args": { + "External id": 994426,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523037.942, "dur": 1.466, + "args": { + "External id": 994427,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523039.820, "dur": 1.176, + "args": { + "External id": 994428,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523040.302, "dur": 0.583, + "args": { + "External id": 994429,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523041.309, "dur": 2.125, + "args": { + "External id": 994430,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523042.718, "dur": 0.600, + "args": { + "External id": 994431,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523043.807, "dur": 2.594, + "args": { + "External id": 994432,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523045.688, "dur": 0.635, + "args": { + "External id": 994433,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523048.693, "dur": 1.070, + "args": { + "External id": 994434,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523049.116, "dur": 0.566, + "args": { + "External id": 994435,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523050.060, "dur": 33.101, + "args": { + "External id": 994436,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523051.832, "dur": 0.653, + "args": { + "External id": 994437,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523086.074, "dur": 3.596, + "args": { + "External id": 994438,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523088.859, "dur": 0.606, + "args": { + "External id": 994439,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523090.021, "dur": 1.281, + "args": { + "External id": 994440,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523090.521, "dur": 0.661, + "args": { + "External id": 994441,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523091.641, "dur": 4.039, + "args": { + "External id": 994442,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523093.783, "dur": 1.737, + "args": { + "External id": 994443,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523095.994, "dur": 1.282, + "args": { + "External id": 994444,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523096.436, "dur": 0.718, + "args": { + "External id": 994445,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523097.840, "dur": 2.967, + "args": { + "External id": 994446,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523099.994, "dur": 0.691, + "args": { + "External id": 994447,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523101.211, "dur": 2.970, + "args": { + "External id": 994448,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523103.222, "dur": 0.880, + "args": { + "External id": 994449,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523106.926, "dur": 1.568, + "args": { + "External id": 994450,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523107.460, "dur": 0.903, + "args": { + "External id": 994451,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523108.792, "dur": 2.571, + "args": { + "External id": 994452,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523110.543, "dur": 0.709, + "args": { + "External id": 994453,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523111.673, "dur": 2.897, + "args": { + "External id": 994454,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523113.735, "dur": 0.724, + "args": { + "External id": 994455,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523115.033, "dur": 1.150, + "args": { + "External id": 994456,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523115.468, "dur": 0.606, + "args": { + "External id": 994457,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523116.478, "dur": 4.335, + "args": { + "External id": 994458,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523118.397, "dur": 2.103, + "args": { + "External id": 994459,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523121.136, "dur": 1.158, + "args": { + "External id": 994460,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523121.611, "dur": 0.605, + "args": { + "External id": 994461,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523122.599, "dur": 2.263, + "args": { + "External id": 994462,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523124.111, "dur": 0.658, + "args": { + "External id": 994463,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523125.171, "dur": 2.130, + "args": { + "External id": 994464,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523126.690, "dur": 0.529, + "args": { + "External id": 994465,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523129.723, "dur": 1.500, + "args": { + "External id": 994466,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523130.203, "dur": 0.909, + "args": { + "External id": 994467,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523131.588, "dur": 2.259, + "args": { + "External id": 994468,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523133.289, "dur": 0.443, + "args": { + "External id": 994469,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523134.354, "dur": 2.723, + "args": { + "External id": 994470,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523136.254, "dur": 0.715, + "args": { + "External id": 994471,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523137.364, "dur": 1.159, + "args": { + "External id": 994472,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523137.794, "dur": 0.619, + "args": { + "External id": 994473,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523138.809, "dur": 3.454, + "args": { + "External id": 994474,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523140.307, "dur": 1.492, + "args": { + "External id": 994475,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523142.634, "dur": 1.182, + "args": { + "External id": 994476,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523143.120, "dur": 0.616, + "args": { + "External id": 994477,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523144.154, "dur": 1.909, + "args": { + "External id": 994478,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523144.938, "dur": 1.011, + "args": { + "External id": 994479,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523146.498, "dur": 4.106, + "args": { + "External id": 994480,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523149.535, "dur": 0.961, + "args": { + "External id": 994481,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523153.075, "dur": 2.304, + "args": { + "External id": 994482,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523154.686, "dur": 0.618, + "args": { + "External id": 994483,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523155.732, "dur": 1.571, + "args": { + "External id": 994484,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523156.563, "dur": 0.652, + "args": { + "External id": 994485,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523157.855, "dur": 3.808, + "args": { + "External id": 994486,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523160.512, "dur": 1.044, + "args": { + "External id": 994487,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523161.983, "dur": 1.917, + "args": { + "External id": 994488,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523162.983, "dur": 0.817, + "args": { + "External id": 994489,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523164.394, "dur": 3.134, + "args": { + "External id": 994490,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523165.811, "dur": 1.587, + "args": { + "External id": 994491,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523167.853, "dur": 1.675, + "args": { + "External id": 994492,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523168.863, "dur": 0.585, + "args": { + "External id": 994493,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523169.912, "dur": 2.739, + "args": { + "External id": 994494,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523171.692, "dur": 0.851, + "args": { + "External id": 994495,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523172.969, "dur": 3.620, + "args": { + "External id": 994496,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523175.843, "dur": 0.631, + "args": { + "External id": 994497,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523179.103, "dur": 2.035, + "args": { + "External id": 994498,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523180.476, "dur": 0.586, + "args": { + "External id": 994499,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523181.487, "dur": 1.955, + "args": { + "External id": 994500,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523182.490, "dur": 0.860, + "args": { + "External id": 994501,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523183.755, "dur": 3.606, + "args": { + "External id": 994502,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523186.623, "dur": 0.633, + "args": { + "External id": 994503,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523187.673, "dur": 1.943, + "args": { + "External id": 994504,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523188.652, "dur": 0.881, + "args": { + "External id": 994505,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523190.111, "dur": 3.314, + "args": { + "External id": 994506,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523191.392, "dur": 1.816, + "args": { + "External id": 994507,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523193.770, "dur": 1.669, + "args": { + "External id": 994508,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523194.687, "dur": 0.672, + "args": { + "External id": 994509,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523195.756, "dur": 2.503, + "args": { + "External id": 994510,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523197.157, "dur": 0.988, + "args": { + "External id": 994511,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523198.623, "dur": 3.001, + "args": { + "External id": 994512,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523200.925, "dur": 0.622, + "args": { + "External id": 994513,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523203.447, "dur": 1.939, + "args": { + "External id": 994514,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523204.742, "dur": 0.561, + "args": { + "External id": 994515,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523205.761, "dur": 1.635, + "args": { + "External id": 994516,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523206.543, "dur": 0.764, + "args": { + "External id": 994517,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523207.740, "dur": 4.284, + "args": { + "External id": 994518,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523211.175, "dur": 0.738, + "args": { + "External id": 994519,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523212.345, "dur": 1.813, + "args": { + "External id": 994520,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523213.547, "dur": 0.530, + "args": { + "External id": 994521,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523214.471, "dur": 2.704, + "args": { + "External id": 994522,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523215.600, "dur": 1.425, + "args": { + "External id": 994523,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523217.526, "dur": 1.628, + "args": { + "External id": 994524,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523218.425, "dur": 0.649, + "args": { + "External id": 994525,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523219.686, "dur": 2.057, + "args": { + "External id": 994526,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523220.927, "dur": 0.703, + "args": { + "External id": 994527,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523222.252, "dur": 3.190, + "args": { + "External id": 994528,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523224.692, "dur": 0.638, + "args": { + "External id": 994529,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523227.955, "dur": 2.581, + "args": { + "External id": 994530,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523229.423, "dur": 1.034, + "args": { + "External id": 994531,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523230.918, "dur": 1.504, + "args": { + "External id": 994532,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523231.587, "dur": 0.745, + "args": { + "External id": 994533,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523232.756, "dur": 3.707, + "args": { + "External id": 994534,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523235.783, "dur": 0.571, + "args": { + "External id": 994535,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523236.838, "dur": 1.558, + "args": { + "External id": 994536,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523237.720, "dur": 0.574, + "args": { + "External id": 994537,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523238.714, "dur": 3.589, + "args": { + "External id": 994538,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523240.316, "dur": 1.861, + "args": { + "External id": 994539,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523242.631, "dur": 1.549, + "args": { + "External id": 994540,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523243.482, "dur": 0.623, + "args": { + "External id": 994541,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523244.568, "dur": 2.579, + "args": { + "External id": 994542,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523246.195, "dur": 0.839, + "args": { + "External id": 994543,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523247.463, "dur": 3.514, + "args": { + "External id": 994544,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523249.713, "dur": 1.183, + "args": { + "External id": 994545,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523252.924, "dur": 2.100, + "args": { + "External id": 994546,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523254.279, "dur": 0.664, + "args": { + "External id": 994547,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523255.339, "dur": 1.806, + "args": { + "External id": 994548,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523256.392, "dur": 0.663, + "args": { + "External id": 994549,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523257.474, "dur": 3.734, + "args": { + "External id": 994550,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523260.243, "dur": 0.860, + "args": { + "External id": 994551,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523261.550, "dur": 1.859, + "args": { + "External id": 994552,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523262.736, "dur": 0.600, + "args": { + "External id": 994553,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523263.769, "dur": 3.399, + "args": { + "External id": 994554,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523265.308, "dur": 1.728, + "args": { + "External id": 994555,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523267.525, "dur": 1.733, + "args": { + "External id": 994556,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523268.451, "dur": 0.716, + "args": { + "External id": 994557,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523269.584, "dur": 2.382, + "args": { + "External id": 994558,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523271.063, "dur": 0.813, + "args": { + "External id": 994559,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523272.302, "dur": 3.821, + "args": { + "External id": 994560,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523275.483, "dur": 0.563, + "args": { + "External id": 994561,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523278.655, "dur": 2.098, + "args": { + "External id": 994562,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523279.830, "dur": 0.809, + "args": { + "External id": 994563,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523281.262, "dur": 2.283, + "args": { + "External id": 994564,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523282.451, "dur": 1.005, + "args": { + "External id": 994565,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523283.863, "dur": 2.979, + "args": { + "External id": 994566,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523286.177, "dur": 0.551, + "args": { + "External id": 994567,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523287.162, "dur": 1.655, + "args": { + "External id": 994568,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523287.917, "dur": 0.816, + "args": { + "External id": 994569,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523289.216, "dur": 2.854, + "args": { + "External id": 994570,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523290.362, "dur": 1.489, + "args": { + "External id": 994571,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523292.396, "dur": 1.763, + "args": { + "External id": 994572,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523293.316, "dur": 0.769, + "args": { + "External id": 994573,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523294.474, "dur": 2.489, + "args": { + "External id": 994574,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523296.130, "dur": 0.721, + "args": { + "External id": 994575,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523297.283, "dur": 3.186, + "args": { + "External id": 994576,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523299.763, "dur": 0.631, + "args": { + "External id": 994577,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523302.550, "dur": 1.899, + "args": { + "External id": 994578,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523303.729, "dur": 0.638, + "args": { + "External id": 994579,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523304.842, "dur": 1.824, + "args": { + "External id": 994580,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523305.767, "dur": 0.809, + "args": { + "External id": 994581,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523306.988, "dur": 3.853, + "args": { + "External id": 994582,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523310.017, "dur": 0.747, + "args": { + "External id": 994583,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523311.156, "dur": 1.961, + "args": { + "External id": 994584,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523312.379, "dur": 0.655, + "args": { + "External id": 994585,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523313.523, "dur": 3.356, + "args": { + "External id": 994586,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523314.519, "dur": 2.229, + "args": { + "External id": 994587,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523317.197, "dur": 2.029, + "args": { + "External id": 994588,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523318.263, "dur": 0.880, + "args": { + "External id": 994589,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523319.543, "dur": 1.995, + "args": { + "External id": 994590,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523320.659, "dur": 0.770, + "args": { + "External id": 994591,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523321.868, "dur": 3.129, + "args": { + "External id": 994592,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523324.255, "dur": 0.659, + "args": { + "External id": 994593,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523326.702, "dur": 2.332, + "args": { + "External id": 994594,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523328.143, "dur": 0.811, + "args": { + "External id": 994595,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523329.373, "dur": 2.150, + "args": { + "External id": 994596,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523330.688, "dur": 0.745, + "args": { + "External id": 994597,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523332.116, "dur": 3.448, + "args": { + "External id": 994598,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523334.603, "dur": 0.880, + "args": { + "External id": 994599,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523335.883, "dur": 2.046, + "args": { + "External id": 994600,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523336.698, "dur": 1.150, + "args": { + "External id": 994601,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523338.243, "dur": 2.743, + "args": { + "External id": 994602,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523339.655, "dur": 1.199, + "args": { + "External id": 994603,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523341.337, "dur": 1.599, + "args": { + "External id": 994604,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523342.244, "dur": 0.613, + "args": { + "External id": 994605,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523343.534, "dur": 2.194, + "args": { + "External id": 994606,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523344.729, "dur": 0.889, + "args": { + "External id": 994607,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523346.041, "dur": 3.091, + "args": { + "External id": 994608,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523348.336, "dur": 0.716, + "args": { + "External id": 994609,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523351.155, "dur": 1.918, + "args": { + "External id": 994610,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523352.462, "dur": 0.520, + "args": { + "External id": 994611,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523353.414, "dur": 1.703, + "args": { + "External id": 994612,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523354.283, "dur": 0.741, + "args": { + "External id": 994613,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523355.435, "dur": 3.202, + "args": { + "External id": 994614,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523357.736, "dur": 0.787, + "args": { + "External id": 994615,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523358.956, "dur": 2.083, + "args": { + "External id": 994616,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523360.409, "dur": 0.552, + "args": { + "External id": 994617,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523361.359, "dur": 2.823, + "args": { + "External id": 994618,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523362.682, "dur": 1.376, + "args": { + "External id": 994619,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523364.526, "dur": 1.617, + "args": { + "External id": 994620,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523365.440, "dur": 0.624, + "args": { + "External id": 994621,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523366.467, "dur": 2.085, + "args": { + "External id": 994622,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523367.755, "dur": 0.687, + "args": { + "External id": 994623,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523369.063, "dur": 3.339, + "args": { + "External id": 994624,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523371.503, "dur": 0.814, + "args": { + "External id": 994625,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523374.948, "dur": 2.554, + "args": { + "External id": 994626,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523376.530, "dur": 0.888, + "args": { + "External id": 994627,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523377.838, "dur": 1.759, + "args": { + "External id": 994628,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523378.762, "dur": 0.740, + "args": { + "External id": 994629,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523380.040, "dur": 3.950, + "args": { + "External id": 994630,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523383.328, "dur": 0.586, + "args": { + "External id": 994631,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523384.324, "dur": 1.982, + "args": { + "External id": 994632,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523385.299, "dur": 0.924, + "args": { + "External id": 994633,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523386.678, "dur": 2.876, + "args": { + "External id": 994634,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523388.070, "dur": 1.343, + "args": { + "External id": 994635,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523389.878, "dur": 1.565, + "args": { + "External id": 994636,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523390.751, "dur": 0.612, + "args": { + "External id": 994637,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523391.759, "dur": 2.307, + "args": { + "External id": 994638,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523393.283, "dur": 0.694, + "args": { + "External id": 994639,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523394.478, "dur": 3.379, + "args": { + "External id": 994640,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523397.126, "dur": 0.621, + "args": { + "External id": 994641,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523400.230, "dur": 2.247, + "args": { + "External id": 994642,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523401.617, "dur": 0.782, + "args": { + "External id": 994643,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523402.820, "dur": 1.992, + "args": { + "External id": 994644,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523403.977, "dur": 0.745, + "args": { + "External id": 994645,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523405.189, "dur": 3.909, + "args": { + "External id": 994646,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523408.208, "dur": 0.794, + "args": { + "External id": 994647,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523409.415, "dur": 2.037, + "args": { + "External id": 994648,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523410.417, "dur": 0.951, + "args": { + "External id": 994649,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523411.764, "dur": 3.053, + "args": { + "External id": 994650,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523413.076, "dur": 1.618, + "args": { + "External id": 994651,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523415.164, "dur": 1.520, + "args": { + "External id": 994652,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523415.949, "dur": 0.654, + "args": { + "External id": 994653,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523417.105, "dur": 2.164, + "args": { + "External id": 994654,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523418.474, "dur": 0.699, + "args": { + "External id": 994655,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523419.613, "dur": 3.227, + "args": { + "External id": 994656,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523422.073, "dur": 0.685, + "args": { + "External id": 994657,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523425.695, "dur": 2.464, + "args": { + "External id": 994658,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523426.758, "dur": 1.327, + "args": { + "External id": 994659,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523428.469, "dur": 2.110, + "args": { + "External id": 994660,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523429.444, "dur": 1.039, + "args": { + "External id": 994661,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523430.897, "dur": 3.350, + "args": { + "External id": 994662,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523433.045, "dur": 1.123, + "args": { + "External id": 994663,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523434.557, "dur": 1.715, + "args": { + "External id": 994664,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523435.320, "dur": 0.863, + "args": { + "External id": 994665,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523436.595, "dur": 3.392, + "args": { + "External id": 994666,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523437.924, "dur": 1.931, + "args": { + "External id": 994667,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523440.300, "dur": 1.989, + "args": { + "External id": 994668,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523441.313, "dur": 0.902, + "args": { + "External id": 994669,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523442.634, "dur": 1.964, + "args": { + "External id": 994670,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523443.871, "dur": 0.617, + "args": { + "External id": 994671,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523445.164, "dur": 3.004, + "args": { + "External id": 994672,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523447.529, "dur": 0.558, + "args": { + "External id": 994673,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523450.798, "dur": 2.593, + "args": { + "External id": 994674,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523452.150, "dur": 1.160, + "args": { + "External id": 994675,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523453.738, "dur": 1.746, + "args": { + "External id": 994676,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523454.595, "dur": 0.800, + "args": { + "External id": 994677,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523455.800, "dur": 3.590, + "args": { + "External id": 994678,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523458.458, "dur": 0.826, + "args": { + "External id": 994679,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523459.732, "dur": 3.245, + "args": { + "External id": 994680,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523462.384, "dur": 0.513, + "args": { + "External id": 994681,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523463.290, "dur": 3.209, + "args": { + "External id": 994682,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523464.526, "dur": 1.828, + "args": { + "External id": 994683,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523466.819, "dur": 2.358, + "args": { + "External id": 994684,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523468.210, "dur": 0.877, + "args": { + "External id": 994685,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523469.490, "dur": 1.897, + "args": { + "External id": 994686,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523470.674, "dur": 0.624, + "args": { + "External id": 994687,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523471.710, "dur": 3.099, + "args": { + "External id": 994688,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523473.870, "dur": 0.827, + "args": { + "External id": 994689,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523477.064, "dur": 1.936, + "args": { + "External id": 994690,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523478.338, "dur": 0.566, + "args": { + "External id": 994691,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523479.317, "dur": 1.907, + "args": { + "External id": 994692,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523480.175, "dur": 0.958, + "args": { + "External id": 994693,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523481.544, "dur": 3.926, + "args": { + "External id": 994694,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523484.454, "dur": 0.934, + "args": { + "External id": 994695,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523485.784, "dur": 1.873, + "args": { + "External id": 994696,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523486.608, "dur": 0.940, + "args": { + "External id": 994697,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523487.999, "dur": 3.568, + "args": { + "External id": 994698,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523489.953, "dur": 1.469, + "args": { + "External id": 994699,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523492.006, "dur": 1.695, + "args": { + "External id": 994700,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523492.998, "dur": 0.597, + "args": { + "External id": 994701,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523494.259, "dur": 2.866, + "args": { + "External id": 994702,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523496.157, "dur": 0.877, + "args": { + "External id": 994703,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523497.438, "dur": 3.029, + "args": { + "External id": 994704,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523499.764, "dur": 0.595, + "args": { + "External id": 994705,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523502.476, "dur": 2.288, + "args": { + "External id": 994706,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523503.898, "dur": 0.786, + "args": { + "External id": 994707,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523505.096, "dur": 2.232, + "args": { + "External id": 994708,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523506.343, "dur": 0.891, + "args": { + "External id": 994709,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523507.666, "dur": 3.228, + "args": { + "External id": 994710,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523509.848, "dur": 0.964, + "args": { + "External id": 994711,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523511.209, "dur": 1.540, + "args": { + "External id": 994712,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523511.990, "dur": 0.647, + "args": { + "External id": 994713,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523513.062, "dur": 3.042, + "args": { + "External id": 994714,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523514.497, "dur": 1.454, + "args": { + "External id": 994715,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523516.423, "dur": 1.862, + "args": { + "External id": 994716,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523517.464, "dur": 0.740, + "args": { + "External id": 994717,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523518.625, "dur": 2.359, + "args": { + "External id": 994718,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523520.037, "dur": 0.855, + "args": { + "External id": 994719,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523521.497, "dur": 3.319, + "args": { + "External id": 994720,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523523.913, "dur": 0.821, + "args": { + "External id": 994721,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523527.609, "dur": 1.625, + "args": { + "External id": 994722,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523528.558, "dur": 0.596, + "args": { + "External id": 994723,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523529.651, "dur": 1.826, + "args": { + "External id": 994724,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523530.577, "dur": 0.807, + "args": { + "External id": 994725,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523531.831, "dur": 3.703, + "args": { + "External id": 994726,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523534.724, "dur": 0.727, + "args": { + "External id": 994727,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523535.866, "dur": 1.793, + "args": { + "External id": 994728,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523536.944, "dur": 0.635, + "args": { + "External id": 994729,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523537.990, "dur": 3.281, + "args": { + "External id": 994730,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523539.371, "dur": 1.765, + "args": { + "External id": 994731,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523541.664, "dur": 1.605, + "args": { + "External id": 994732,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523542.608, "dur": 0.582, + "args": { + "External id": 994733,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523543.569, "dur": 2.768, + "args": { + "External id": 994734,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523545.235, "dur": 1.012, + "args": { + "External id": 994735,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523546.952, "dur": 3.152, + "args": { + "External id": 994736,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523549.358, "dur": 0.634, + "args": { + "External id": 994737,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523551.609, "dur": 1.942, + "args": { + "External id": 994738,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523552.804, "dur": 0.667, + "args": { + "External id": 994739,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523553.946, "dur": 2.213, + "args": { + "External id": 994740,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523555.324, "dur": 0.740, + "args": { + "External id": 994741,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523556.509, "dur": 3.466, + "args": { + "External id": 994742,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523559.270, "dur": 0.631, + "args": { + "External id": 994743,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523560.335, "dur": 1.588, + "args": { + "External id": 994744,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523561.222, "dur": 0.618, + "args": { + "External id": 994745,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523562.258, "dur": 3.534, + "args": { + "External id": 994746,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523563.760, "dur": 1.810, + "args": { + "External id": 994747,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523566.124, "dur": 1.874, + "args": { + "External id": 994748,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523567.116, "dur": 0.806, + "args": { + "External id": 994749,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523568.339, "dur": 2.756, + "args": { + "External id": 994750,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523570.175, "dur": 0.830, + "args": { + "External id": 994751,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523571.425, "dur": 3.044, + "args": { + "External id": 994752,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523573.690, "dur": 0.674, + "args": { + "External id": 994753,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523576.347, "dur": 2.145, + "args": { + "External id": 994754,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523577.836, "dur": 0.578, + "args": { + "External id": 994755,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523578.845, "dur": 1.604, + "args": { + "External id": 994756,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523579.618, "dur": 0.739, + "args": { + "External id": 994757,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523580.845, "dur": 3.114, + "args": { + "External id": 994758,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523583.184, "dur": 0.669, + "args": { + "External id": 994759,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523584.320, "dur": 1.994, + "args": { + "External id": 994760,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523585.324, "dur": 0.911, + "args": { + "External id": 994761,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523586.699, "dur": 3.045, + "args": { + "External id": 994762,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523588.062, "dur": 1.544, + "args": { + "External id": 994763,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523590.082, "dur": 1.779, + "args": { + "External id": 994764,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523591.138, "dur": 0.645, + "args": { + "External id": 994765,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523592.222, "dur": 2.142, + "args": { + "External id": 994766,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523593.364, "dur": 0.908, + "args": { + "External id": 994767,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523594.771, "dur": 3.335, + "args": { + "External id": 994768,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523597.406, "dur": 0.613, + "args": { + "External id": 994769,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523600.389, "dur": 2.096, + "args": { + "External id": 994770,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523601.731, "dur": 0.671, + "args": { + "External id": 994771,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523602.815, "dur": 2.001, + "args": { + "External id": 994772,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523603.964, "dur": 0.763, + "args": { + "External id": 994773,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523605.144, "dur": 3.344, + "args": { + "External id": 994774,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523607.532, "dur": 0.846, + "args": { + "External id": 994775,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523608.912, "dur": 1.361, + "args": { + "External id": 994776,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523609.745, "dur": 0.434, + "args": { + "External id": 994777,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523610.647, "dur": 9.296, + "args": { + "External id": 994778,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523615.199, "dur": 4.580, + "args": { + "External id": 994779,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523620.273, "dur": 2.071, + "args": { + "External id": 994780,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523621.454, "dur": 0.795, + "args": { + "External id": 994781,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523622.755, "dur": 2.296, + "args": { + "External id": 994782,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523623.914, "dur": 1.039, + "args": { + "External id": 994783,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523625.449, "dur": 2.850, + "args": { + "External id": 994784,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523627.621, "dur": 0.583, + "args": { + "External id": 994785,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523630.429, "dur": 1.800, + "args": { + "External id": 994786,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523631.580, "dur": 0.571, + "args": { + "External id": 994787,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523632.538, "dur": 1.691, + "args": { + "External id": 994788,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523633.474, "dur": 0.655, + "args": { + "External id": 994789,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523634.583, "dur": 3.726, + "args": { + "External id": 994790,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523637.518, "dur": 0.715, + "args": { + "External id": 994791,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523638.662, "dur": 2.629, + "args": { + "External id": 994792,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523639.868, "dur": 1.343, + "args": { + "External id": 994793,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523641.670, "dur": 3.385, + "args": { + "External id": 994794,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523642.754, "dur": 1.944, + "args": { + "External id": 994795,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523645.376, "dur": 2.017, + "args": { + "External id": 994796,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523646.539, "dur": 0.746, + "args": { + "External id": 994797,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523647.709, "dur": 2.682, + "args": { + "External id": 994798,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523649.396, "dur": 0.905, + "args": { + "External id": 994799,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523650.709, "dur": 3.117, + "args": { + "External id": 994800,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523652.800, "dur": 0.945, + "args": { + "External id": 994801,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523655.786, "dur": 2.432, + "args": { + "External id": 994802,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523657.158, "dur": 0.948, + "args": { + "External id": 994803,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523658.558, "dur": 1.940, + "args": { + "External id": 994804,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523659.778, "dur": 0.609, + "args": { + "External id": 994805,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523660.869, "dur": 3.226, + "args": { + "External id": 994806,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523663.029, "dur": 0.990, + "args": { + "External id": 994807,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523664.656, "dur": 1.875, + "args": { + "External id": 994808,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523665.514, "dur": 0.941, + "args": { + "External id": 994809,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523666.912, "dur": 2.691, + "args": { + "External id": 994810,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523667.924, "dur": 1.354, + "args": { + "External id": 994811,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523669.921, "dur": 1.823, + "args": { + "External id": 994812,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523671.004, "dur": 0.655, + "args": { + "External id": 994813,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523672.090, "dur": 2.444, + "args": { + "External id": 994814,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523673.231, "dur": 1.221, + "args": { + "External id": 994815,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523674.914, "dur": 25.627, + "args": { + "External id": 994816,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523699.325, "dur": 1.122, + "args": { + "External id": 994817,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523702.733, "dur": 1.729, + "args": { + "External id": 994818,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523703.703, "dur": 0.678, + "args": { + "External id": 994819,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523704.820, "dur": 1.584, + "args": { + "External id": 994820,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523705.738, "dur": 0.576, + "args": { + "External id": 994821,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523706.720, "dur": 3.379, + "args": { + "External id": 994822,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523709.359, "dur": 0.647, + "args": { + "External id": 994823,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523710.413, "dur": 1.478, + "args": { + "External id": 994824,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523711.331, "dur": 0.466, + "args": { + "External id": 994825,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523712.424, "dur": 2.859, + "args": { + "External id": 994826,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523713.433, "dur": 1.671, + "args": { + "External id": 994827,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523715.660, "dur": 1.944, + "args": { + "External id": 994828,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523716.872, "dur": 0.662, + "args": { + "External id": 994829,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523717.998, "dur": 2.202, + "args": { + "External id": 994830,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523719.171, "dur": 0.943, + "args": { + "External id": 994831,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523720.567, "dur": 2.860, + "args": { + "External id": 994832,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523722.934, "dur": 0.404, + "args": { + "External id": 994833,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523725.910, "dur": 2.054, + "args": { + "External id": 994834,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523727.010, "dur": 0.874, + "args": { + "External id": 994835,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523728.295, "dur": 1.959, + "args": { + "External id": 994836,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523729.324, "dur": 0.845, + "args": { + "External id": 994837,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338706, "tid": 2338706, + "ts": 6345942523730.582, "dur": 3.433, + "args": { + "External id": 994838,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942523733.399, "dur": 0.538, + "args": { + "External id": 994839,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2338706, "tid": 2338706, + "ts": 6345942523757.569, "dur": 151.291, + "args": { + "External id": 994840,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 2338706, "tid": 2338706, + "ts": 6345942524087.654, "dur": 170.585, + "args": { + "External id": 994841,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[273], [], [], [], []], "Ev Idx": 21597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 2338706, "tid": 2338706, + "ts": 6345942524172.412, "dur": 59.718, + "args": { + "External id": 994842,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[273], [], [], [], []], "Ev Idx": 21598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338706, "tid": 2338706, + "ts": 6345942524189.899, "dur": 2.721, + "args": { + "External id": 994843,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Redistribute", "pid": 2338706, "tid": 2338706, + "ts": 6345942524707.724, "dur": 1148.868, + "args": { + "External id": 994844,"Sequence number": 10552706, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "False"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2338706, "tid": 2338706, + "ts": 6345942524775.901, "dur": 65.962, + "args": { + "External id": 994845,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942524781.681, "dur": 1.720, + "args": { + "External id": 994846,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942524785.588, "dur": 0.832, + "args": { + "External id": 994847,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 21603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::all_reduce", "pid": 2338706, "tid": 2338706, + "ts": 6345942524869.424, "dur": 598.776, + "args": { + "External id": 994848,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["float", "", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338706, "tid": 2338706, + "ts": 6345942524873.822, "dur": 53.151, + "args": { + "External id": 994849,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338706, "tid": 2338706, + "ts": 6345942524877.944, "dur": 10.524, + "args": { + "External id": 994850,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "0"], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 21606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345942524882.317, "dur": 5.199, + "args": { + "External id": 994851,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 21607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338706, "tid": 2338706, + "ts": 6345942524890.418, "dur": 35.875, + "args": { + "External id": 994852,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::allreduce_", "pid": 2338706, "tid": 2338706, + "ts": 6345942524935.857, "dur": 528.089, + "args": { + "External id": 994853,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "-1"], "Input type": ["TensorList", "", "", "", "Scalar"], "Input Strides": [[[]], [], [], [], []], "Input Dims": [[[]], [], [], [], []], "Ev Idx": 21609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345942524978.238, "dur": 477.520, + "args": { + "External id": 994854,"Record function id": 0, "Collective name": "allreduce", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[[]], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1, "Process Group Name": "0", "Input type": ["TensorList", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[[]], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 21610, "In msg nelems": 1 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:all_reduce", "pid": 2338706, "tid": 2338706, + "ts": 6345942524993.223, "dur": 453.229, + "args": { + "External id": 994855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2338706, "tid": 2338706, + "ts": 6345942525551.936, "dur": 260.645, + "args": { + "External id": 994856,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::wait_tensor", "pid": 2338706, "tid": 2338706, + "ts": 6345942525677.749, "dur": 42.479, + "args": { + "External id": 994857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338706, "tid": 2338706, + "ts": 6345942525704.484, "dur": 5.260, + "args": { + "External id": 994858,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 21614, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2338706, "tid": 2338706, + "ts": 6345942525753.617, "dur": 51.843, + "args": { + "External id": 994859,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942525756.667, "dur": 1.327, + "args": { + "External id": 994860,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942525759.737, "dur": 0.794, + "args": { + "External id": 994861,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 21617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_ToTorchTensor", "pid": 2338706, "tid": 2338706, + "ts": 6345942525875.967, "dur": 163.248, + "args": { + "External id": 994862,"Sequence number": 10552707, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338706, "tid": 2338706, + "ts": 6345942525994.925, "dur": 12.061, + "args": { + "External id": 994863,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338706, "tid": 2338706, + "ts": 6345942525999.645, "dur": 7.140, + "args": { + "External id": 994864,"Record function id": 0, "Concrete Inputs": ["", "[]"], "Input type": ["float", "ScalarList"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338706, "tid": 2338706, + "ts": 6345942526475.078, "dur": 48.562, + "args": { + "External id": 994865,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "double", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reciprocal", "pid": 2338706, "tid": 2338706, + "ts": 6345942526537.204, "dur": 24.035, + "args": { + "External id": 994866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mul", "pid": 2338706, "tid": 2338706, + "ts": 6345942526569.499, "dur": 25.798, + "args": { + "External id": 994867,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "double"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clamp", "pid": 2338706, "tid": 2338706, + "ts": 6345942526611.394, "dur": 28.331, + "args": { + "External id": 994868,"Record function id": 0, "Concrete Inputs": ["", "", "1."], "Input type": ["float", "", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942526615.128, "dur": 0.732, + "args": { + "External id": 994869,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 21625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338706, "tid": 2338706, + "ts": 6345942526661.409, "dur": 0.607, + "args": { + "External id": 994870,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 2338706, "tid": 2338706, + "ts": 6345942526813.613, "dur": 1283.340, + "args": { + "External id": 994871,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 2338706, "tid": 2338706, + "ts": 6345942527338.674, "dur": 660.861, + "args": { + "External id": 994872,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isnan", "pid": 2338706, "tid": 2338706, + "ts": 6345942528163.435, "dur": 42.960, + "args": { + "External id": 994873,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338706, "tid": 2338706, + "ts": 6345942528168.156, "dur": 37.509, + "args": { + "External id": 994874,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338706, "tid": 2338706, + "ts": 6345942528211.760, "dur": 7582.360, + "args": { + "External id": 994875,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338706, "tid": 2338706, + "ts": 6345942528213.659, "dur": 7579.836, + "args": { + "External id": 994876,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338706, "tid": 2338706, + "ts": 6345942528216.331, "dur": 7574.626, + "args": { + "External id": 994877,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isinf", "pid": 2338706, "tid": 2338706, + "ts": 6345942535809.879, "dur": 103.970, + "args": { + "External id": 994878,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345942535814.521, "dur": 66.063, + "args": { + "External id": 994879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338706, "tid": 2338706, + "ts": 6345942535828.096, "dur": 6.151, + "args": { + "External id": 994880,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 21636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338706, "tid": 2338706, + "ts": 6345942535837.806, "dur": 42.294, + "args": { + "External id": 994881,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], [1]], "Input Dims": [[], [0]], "Ev Idx": 21637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338706, "tid": 2338706, + "ts": 6345942535847.829, "dur": 5.126, + "args": { + "External id": 994882,"Record function id": 0, "Concrete Inputs": ["", "[]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 21638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338706, "tid": 2338706, + "ts": 6345942535882.830, "dur": 29.971, + "args": { + "External id": 994883,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338706, "tid": 2338706, + "ts": 6345942535919.018, "dur": 51.408, + "args": { + "External id": 994884,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338706, "tid": 2338706, + "ts": 6345942535920.225, "dur": 49.995, + "args": { + "External id": 994885,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338706, "tid": 2338706, + "ts": 6345942535922.500, "dur": 47.360, + "args": { + "External id": 994886,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21642 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#OptimizersContainer.step", "pid": 2338706, "tid": 2338706, + "ts": 6345942536040.082, "dur": 6804.541, + "args": { + "External id": 994887,"Record function id": 0, "Ev Idx": 21643 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#AdamW.step", "pid": 2338706, "tid": 2338706, + "ts": 6345942536128.427, "dur": 6686.682, + "args": { + "External id": 994888,"Record function id": 0, "Ev Idx": 21644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_add_", "pid": 2338706, "tid": 2338706, + "ts": 6345942537707.984, "dur": 264.451, + "args": { + "External id": 994889,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537730.373, "dur": 1.417, + "args": { + "External id": 994890,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537734.415, "dur": 0.239, + "args": { + "External id": 994891,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537735.221, "dur": 0.237, + "args": { + "External id": 994892,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537735.954, "dur": 0.079, + "args": { + "External id": 994893,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537736.349, "dur": 0.210, + "args": { + "External id": 994894,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537736.975, "dur": 0.214, + "args": { + "External id": 994895,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537737.744, "dur": 0.162, + "args": { + "External id": 994896,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537738.365, "dur": 0.262, + "args": { + "External id": 994897,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537739.205, "dur": 0.112, + "args": { + "External id": 994898,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537739.661, "dur": 0.106, + "args": { + "External id": 994899,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537740.233, "dur": 0.103, + "args": { + "External id": 994900,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537740.800, "dur": 0.079, + "args": { + "External id": 994901,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537741.474, "dur": 0.086, + "args": { + "External id": 994902,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537742.143, "dur": 0.097, + "args": { + "External id": 994903,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537742.657, "dur": 0.375, + "args": { + "External id": 994904,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537743.465, "dur": 0.229, + "args": { + "External id": 994905,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537744.185, "dur": 0.109, + "args": { + "External id": 994906,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537744.817, "dur": 0.255, + "args": { + "External id": 994907,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537745.514, "dur": 0.228, + "args": { + "External id": 994908,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537746.220, "dur": 0.074, + "args": { + "External id": 994909,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537746.724, "dur": 0.191, + "args": { + "External id": 994910,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537747.354, "dur": 0.246, + "args": { + "External id": 994911,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537748.020, "dur": 0.076, + "args": { + "External id": 994912,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537748.547, "dur": 0.077, + "args": { + "External id": 994913,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537749.021, "dur": 0.082, + "args": { + "External id": 994914,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537749.544, "dur": 0.083, + "args": { + "External id": 994915,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537750.063, "dur": 0.079, + "args": { + "External id": 994916,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537750.614, "dur": 0.078, + "args": { + "External id": 994917,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537751.099, "dur": 0.080, + "args": { + "External id": 994918,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537751.613, "dur": 0.068, + "args": { + "External id": 994919,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537752.235, "dur": 0.251, + "args": { + "External id": 994920,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537752.990, "dur": 0.253, + "args": { + "External id": 994921,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537753.724, "dur": 0.225, + "args": { + "External id": 994922,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537754.477, "dur": 0.217, + "args": { + "External id": 994923,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537755.142, "dur": 0.245, + "args": { + "External id": 994924,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537755.807, "dur": 0.059, + "args": { + "External id": 994925,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537756.294, "dur": 0.100, + "args": { + "External id": 994926,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537756.840, "dur": 0.096, + "args": { + "External id": 994927,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537757.387, "dur": 0.100, + "args": { + "External id": 994928,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537757.925, "dur": 0.236, + "args": { + "External id": 994929,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537758.994, "dur": 0.059, + "args": { + "External id": 994930,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537759.512, "dur": 0.076, + "args": { + "External id": 994931,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537760.089, "dur": 0.080, + "args": { + "External id": 994932,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537760.629, "dur": 0.075, + "args": { + "External id": 994933,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537761.274, "dur": 0.073, + "args": { + "External id": 994934,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537761.804, "dur": 0.076, + "args": { + "External id": 994935,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537762.287, "dur": 0.080, + "args": { + "External id": 994936,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537762.774, "dur": 0.066, + "args": { + "External id": 994937,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537763.272, "dur": 0.073, + "args": { + "External id": 994938,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537763.799, "dur": 0.078, + "args": { + "External id": 994939,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537764.307, "dur": 0.068, + "args": { + "External id": 994940,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537764.824, "dur": 0.073, + "args": { + "External id": 994941,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537765.352, "dur": 0.062, + "args": { + "External id": 994942,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537765.821, "dur": 0.077, + "args": { + "External id": 994943,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537766.268, "dur": 0.209, + "args": { + "External id": 994944,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537766.971, "dur": 0.103, + "args": { + "External id": 994945,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537767.463, "dur": 0.104, + "args": { + "External id": 994946,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537767.956, "dur": 0.220, + "args": { + "External id": 994947,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537768.581, "dur": 0.254, + "args": { + "External id": 994948,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537769.302, "dur": 0.079, + "args": { + "External id": 994949,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537769.867, "dur": 0.092, + "args": { + "External id": 994950,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537770.400, "dur": 0.191, + "args": { + "External id": 994951,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537770.975, "dur": 0.214, + "args": { + "External id": 994952,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537771.627, "dur": 0.194, + "args": { + "External id": 994953,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537772.242, "dur": 0.221, + "args": { + "External id": 994954,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537772.907, "dur": 0.076, + "args": { + "External id": 994955,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537773.399, "dur": 0.078, + "args": { + "External id": 994956,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537773.866, "dur": 0.079, + "args": { + "External id": 994957,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537774.394, "dur": 0.073, + "args": { + "External id": 994958,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537774.902, "dur": 0.059, + "args": { + "External id": 994959,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537775.390, "dur": 0.074, + "args": { + "External id": 994960,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537775.891, "dur": 0.082, + "args": { + "External id": 994961,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537776.419, "dur": 0.082, + "args": { + "External id": 994962,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537776.976, "dur": 0.081, + "args": { + "External id": 994963,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537777.447, "dur": 0.079, + "args": { + "External id": 994964,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537778.028, "dur": 0.073, + "args": { + "External id": 994965,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537778.601, "dur": 0.080, + "args": { + "External id": 994966,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537779.174, "dur": 0.214, + "args": { + "External id": 994967,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537779.789, "dur": 0.086, + "args": { + "External id": 994968,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537780.348, "dur": 0.111, + "args": { + "External id": 994969,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537780.930, "dur": 0.077, + "args": { + "External id": 994970,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537781.443, "dur": 0.242, + "args": { + "External id": 994971,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537782.085, "dur": 0.229, + "args": { + "External id": 994972,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537782.774, "dur": 0.077, + "args": { + "External id": 994973,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537783.287, "dur": 0.078, + "args": { + "External id": 994974,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537783.794, "dur": 0.210, + "args": { + "External id": 994975,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537784.549, "dur": 0.087, + "args": { + "External id": 994976,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537785.039, "dur": 0.105, + "args": { + "External id": 994977,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537785.584, "dur": 0.080, + "args": { + "External id": 994978,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537786.094, "dur": 0.080, + "args": { + "External id": 994979,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537786.605, "dur": 0.080, + "args": { + "External id": 994980,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537787.127, "dur": 0.075, + "args": { + "External id": 994981,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537787.641, "dur": 0.076, + "args": { + "External id": 994982,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537788.162, "dur": 0.078, + "args": { + "External id": 994983,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537788.635, "dur": 0.079, + "args": { + "External id": 994984,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537789.188, "dur": 0.068, + "args": { + "External id": 994985,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537789.629, "dur": 0.080, + "args": { + "External id": 994986,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537790.160, "dur": 0.080, + "args": { + "External id": 994987,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537790.745, "dur": 0.062, + "args": { + "External id": 994988,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537791.224, "dur": 0.081, + "args": { + "External id": 994989,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537791.747, "dur": 0.077, + "args": { + "External id": 994990,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537792.269, "dur": 0.078, + "args": { + "External id": 994991,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537792.817, "dur": 0.078, + "args": { + "External id": 994992,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537793.453, "dur": 0.268, + "args": { + "External id": 994993,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537794.172, "dur": 0.202, + "args": { + "External id": 994994,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537794.793, "dur": 0.236, + "args": { + "External id": 994995,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537795.445, "dur": 0.229, + "args": { + "External id": 994996,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537796.034, "dur": 0.076, + "args": { + "External id": 994997,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537796.486, "dur": 0.188, + "args": { + "External id": 994998,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537797.099, "dur": 0.207, + "args": { + "External id": 994999,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537797.743, "dur": 0.212, + "args": { + "External id": 995000,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537798.363, "dur": 0.226, + "args": { + "External id": 995001,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537798.981, "dur": 0.185, + "args": { + "External id": 995002,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537799.760, "dur": 0.065, + "args": { + "External id": 995003,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537800.120, "dur": 0.080, + "args": { + "External id": 995004,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537800.783, "dur": 0.074, + "args": { + "External id": 995005,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537801.342, "dur": 0.079, + "args": { + "External id": 995006,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537801.894, "dur": 0.073, + "args": { + "External id": 995007,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537802.402, "dur": 0.078, + "args": { + "External id": 995008,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537802.946, "dur": 0.080, + "args": { + "External id": 995009,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537803.449, "dur": 0.078, + "args": { + "External id": 995010,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537804.070, "dur": 0.071, + "args": { + "External id": 995011,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537804.585, "dur": 0.080, + "args": { + "External id": 995012,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537805.017, "dur": 0.074, + "args": { + "External id": 995013,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537805.483, "dur": 0.078, + "args": { + "External id": 995014,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537806.001, "dur": 0.073, + "args": { + "External id": 995015,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537806.499, "dur": 0.078, + "args": { + "External id": 995016,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537806.927, "dur": 0.081, + "args": { + "External id": 995017,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537807.511, "dur": 0.073, + "args": { + "External id": 995018,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537808.034, "dur": 0.220, + "args": { + "External id": 995019,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537808.660, "dur": 0.230, + "args": { + "External id": 995020,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537809.315, "dur": 0.079, + "args": { + "External id": 995021,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537809.869, "dur": 0.185, + "args": { + "External id": 995022,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537810.454, "dur": 0.213, + "args": { + "External id": 995023,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537811.161, "dur": 0.217, + "args": { + "External id": 995024,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537811.813, "dur": 0.301, + "args": { + "External id": 995025,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537812.533, "dur": 0.222, + "args": { + "External id": 995026,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537813.198, "dur": 0.231, + "args": { + "External id": 995027,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537813.814, "dur": 0.106, + "args": { + "External id": 995028,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537814.349, "dur": 0.076, + "args": { + "External id": 995029,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537814.825, "dur": 0.207, + "args": { + "External id": 995030,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537815.454, "dur": 0.079, + "args": { + "External id": 995031,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537815.956, "dur": 0.080, + "args": { + "External id": 995032,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537816.423, "dur": 0.078, + "args": { + "External id": 995033,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537816.823, "dur": 0.076, + "args": { + "External id": 995034,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537817.343, "dur": 0.077, + "args": { + "External id": 995035,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537817.925, "dur": 0.076, + "args": { + "External id": 995036,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537818.500, "dur": 0.093, + "args": { + "External id": 995037,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537819.005, "dur": 0.077, + "args": { + "External id": 995038,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537819.476, "dur": 0.073, + "args": { + "External id": 995039,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537819.965, "dur": 0.077, + "args": { + "External id": 995040,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537820.354, "dur": 0.079, + "args": { + "External id": 995041,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537820.816, "dur": 0.073, + "args": { + "External id": 995042,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537821.245, "dur": 0.081, + "args": { + "External id": 995043,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537821.634, "dur": 0.080, + "args": { + "External id": 995044,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537822.309, "dur": 0.078, + "args": { + "External id": 995045,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537822.803, "dur": 0.062, + "args": { + "External id": 995046,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537823.267, "dur": 0.100, + "args": { + "External id": 995047,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537823.970, "dur": 0.206, + "args": { + "External id": 995048,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537824.661, "dur": 0.255, + "args": { + "External id": 995049,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537825.208, "dur": 0.082, + "args": { + "External id": 995050,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537825.829, "dur": 0.251, + "args": { + "External id": 995051,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537826.372, "dur": 0.233, + "args": { + "External id": 995052,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537827.101, "dur": 0.073, + "args": { + "External id": 995053,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537827.464, "dur": 0.195, + "args": { + "External id": 995054,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537828.145, "dur": 0.200, + "args": { + "External id": 995055,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537828.634, "dur": 0.212, + "args": { + "External id": 995056,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537829.328, "dur": 0.233, + "args": { + "External id": 995057,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537829.848, "dur": 0.065, + "args": { + "External id": 995058,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537830.276, "dur": 0.074, + "args": { + "External id": 995059,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537830.638, "dur": 0.065, + "args": { + "External id": 995060,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537831.121, "dur": 0.074, + "args": { + "External id": 995061,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537831.521, "dur": 0.060, + "args": { + "External id": 995062,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537832.020, "dur": 0.071, + "args": { + "External id": 995063,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537832.381, "dur": 0.063, + "args": { + "External id": 995064,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537832.928, "dur": 0.074, + "args": { + "External id": 995065,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537833.288, "dur": 0.346, + "args": { + "External id": 995066,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537834.085, "dur": 0.104, + "args": { + "External id": 995067,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537834.482, "dur": 0.254, + "args": { + "External id": 995068,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537835.434, "dur": 0.073, + "args": { + "External id": 995069,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537835.792, "dur": 0.062, + "args": { + "External id": 995070,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537836.546, "dur": 0.075, + "args": { + "External id": 995071,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537836.911, "dur": 0.063, + "args": { + "External id": 995072,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537837.391, "dur": 0.314, + "args": { + "External id": 995073,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537837.996, "dur": 0.298, + "args": { + "External id": 995074,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537838.738, "dur": 0.316, + "args": { + "External id": 995075,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537839.344, "dur": 0.097, + "args": { + "External id": 995076,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537839.879, "dur": 0.078, + "args": { + "External id": 995077,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537840.251, "dur": 0.061, + "args": { + "External id": 995078,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537840.781, "dur": 0.073, + "args": { + "External id": 995079,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537841.181, "dur": 0.066, + "args": { + "External id": 995080,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537841.734, "dur": 0.075, + "args": { + "External id": 995081,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537842.094, "dur": 0.061, + "args": { + "External id": 995082,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537842.544, "dur": 0.077, + "args": { + "External id": 995083,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537842.919, "dur": 0.062, + "args": { + "External id": 995084,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537843.354, "dur": 0.076, + "args": { + "External id": 995085,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537843.723, "dur": 0.063, + "args": { + "External id": 995086,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537844.195, "dur": 0.080, + "args": { + "External id": 995087,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537844.564, "dur": 0.066, + "args": { + "External id": 995088,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537845.161, "dur": 0.084, + "args": { + "External id": 995089,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537845.535, "dur": 0.067, + "args": { + "External id": 995090,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537845.979, "dur": 0.075, + "args": { + "External id": 995091,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537846.340, "dur": 0.068, + "args": { + "External id": 995092,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537846.848, "dur": 0.074, + "args": { + "External id": 995093,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537847.208, "dur": 0.063, + "args": { + "External id": 995094,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537847.950, "dur": 0.078, + "args": { + "External id": 995095,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537848.321, "dur": 0.067, + "args": { + "External id": 995096,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537848.820, "dur": 0.078, + "args": { + "External id": 995097,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537849.188, "dur": 0.095, + "args": { + "External id": 995098,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537849.617, "dur": 0.077, + "args": { + "External id": 995099,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537849.982, "dur": 0.060, + "args": { + "External id": 995100,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537850.549, "dur": 0.074, + "args": { + "External id": 995101,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537850.906, "dur": 0.061, + "args": { + "External id": 995102,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537851.426, "dur": 0.077, + "args": { + "External id": 995103,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537851.788, "dur": 0.057, + "args": { + "External id": 995104,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537852.338, "dur": 0.077, + "args": { + "External id": 995105,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537852.703, "dur": 0.063, + "args": { + "External id": 995106,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537853.148, "dur": 0.078, + "args": { + "External id": 995107,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537853.514, "dur": 0.057, + "args": { + "External id": 995108,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537853.993, "dur": 0.071, + "args": { + "External id": 995109,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537854.394, "dur": 0.060, + "args": { + "External id": 995110,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537854.924, "dur": 0.077, + "args": { + "External id": 995111,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537855.476, "dur": 0.078, + "args": { + "External id": 995112,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537855.972, "dur": 0.076, + "args": { + "External id": 995113,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537856.360, "dur": 0.059, + "args": { + "External id": 995114,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537856.889, "dur": 0.080, + "args": { + "External id": 995115,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537857.275, "dur": 0.064, + "args": { + "External id": 995116,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537857.654, "dur": 0.077, + "args": { + "External id": 995117,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537858.034, "dur": 0.062, + "args": { + "External id": 995118,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537858.541, "dur": 0.080, + "args": { + "External id": 995119,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537858.924, "dur": 0.057, + "args": { + "External id": 995120,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537859.415, "dur": 0.081, + "args": { + "External id": 995121,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537859.800, "dur": 0.065, + "args": { + "External id": 995122,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537860.252, "dur": 0.078, + "args": { + "External id": 995123,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537860.640, "dur": 0.062, + "args": { + "External id": 995124,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537861.154, "dur": 0.078, + "args": { + "External id": 995125,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537861.541, "dur": 0.063, + "args": { + "External id": 995126,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537862.034, "dur": 0.080, + "args": { + "External id": 995127,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537862.416, "dur": 0.064, + "args": { + "External id": 995128,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537862.906, "dur": 0.307, + "args": { + "External id": 995129,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537863.528, "dur": 0.089, + "args": { + "External id": 995130,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537864.027, "dur": 0.106, + "args": { + "External id": 995131,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537864.429, "dur": 0.070, + "args": { + "External id": 995132,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537864.994, "dur": 0.077, + "args": { + "External id": 995133,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537865.555, "dur": 0.075, + "args": { + "External id": 995134,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537866.202, "dur": 0.283, + "args": { + "External id": 995135,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537866.868, "dur": 0.096, + "args": { + "External id": 995136,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537867.432, "dur": 0.081, + "args": { + "External id": 995137,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537867.808, "dur": 0.063, + "args": { + "External id": 995138,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537868.305, "dur": 0.374, + "args": { + "External id": 995139,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537868.984, "dur": 0.078, + "args": { + "External id": 995140,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537869.509, "dur": 0.075, + "args": { + "External id": 995141,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537869.885, "dur": 0.064, + "args": { + "External id": 995142,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537870.301, "dur": 0.076, + "args": { + "External id": 995143,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537870.680, "dur": 0.065, + "args": { + "External id": 995144,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537871.174, "dur": 0.076, + "args": { + "External id": 995145,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537871.554, "dur": 0.082, + "args": { + "External id": 995146,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537871.985, "dur": 0.076, + "args": { + "External id": 995147,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537872.369, "dur": 0.059, + "args": { + "External id": 995148,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537872.892, "dur": 0.081, + "args": { + "External id": 995149,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537873.270, "dur": 0.064, + "args": { + "External id": 995150,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537873.748, "dur": 0.079, + "args": { + "External id": 995151,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537874.146, "dur": 0.062, + "args": { + "External id": 995152,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537874.638, "dur": 0.083, + "args": { + "External id": 995153,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537875.028, "dur": 0.061, + "args": { + "External id": 995154,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537875.545, "dur": 0.084, + "args": { + "External id": 995155,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537875.936, "dur": 0.063, + "args": { + "External id": 995156,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537876.414, "dur": 0.079, + "args": { + "External id": 995157,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537876.803, "dur": 0.063, + "args": { + "External id": 995158,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537877.373, "dur": 0.061, + "args": { + "External id": 995159,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537877.742, "dur": 0.063, + "args": { + "External id": 995160,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537878.235, "dur": 0.080, + "args": { + "External id": 995161,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338706, "tid": 2338706, + "ts": 6345942537878.625, "dur": 0.063, + "args": { + "External id": 995162,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 2338706, "tid": 2338706, + "ts": 6345942538593.748, "dur": 4109.355, + "args": { + "External id": 995163,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "9.3476373648457231e-06", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 21919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 2338706, "tid": 2338706, + "ts": 6345942541428.926, "dur": 1056.274, + "args": { + "External id": 995164,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "9.3476373648457231e-06", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 21920 + } + }, + { + "name": "process_name", "ph": "M", "ts": 6345936068625.110, "pid": 2338706, "tid": 0, + "args": { + "name": "python3.12" + } + }, + { + "name": "process_labels", "ph": "M", "ts": 6345936068625.110, "pid": 2338706, "tid": 0, + "args": { + "labels": "CPU" + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 6345936068625.110, "pid": 2338706, "tid": 0, + "args": { + "sort_index": 2338706 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 6345936068625.110, "pid": 2338706, "tid": 2338706, + "args": { + "name": "thread 2338706 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 6345936068625.110, "pid": 2338706, "tid": 2338706, + "args": { + "sort_index": 2338706 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 6345936068625.110, "pid": 2338706, "tid": 2379422, + "args": { + "name": "thread 2379422 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 6345936068625.110, "pid": 2338706, "tid": 2379422, + "args": { + "sort_index": 2379422 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 6345936068625.110, "pid": 2338706, "tid": 2379422, + "args": { + "name": "thread 2379422 (pt_autograd_0)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 6345936068625.110, "pid": 2338706, "tid": 2379422, + "args": { + "sort_index": 2379422 + } + }, + { + "ph": "X", "cat": "Trace", "ts": 6345936068544.138, "dur": 6481062.222, + "pid": "Spans", "tid": "PyTorch Profiler", + "name": "PyTorch Profiler (0)", + "args": { + "Op count": 0 + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 6345936068544.138, + "pid": "Spans", "tid": 0, + "args": { + "sort_index": 536870912 + } + }, + { + "name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g", + "pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 6345936068544.138 + }, + { + "name": "Record Window End", "ph": "i", "s": "g", + "pid": "", "tid": "", "ts": 6345942655622.491 + } + ], + "traceName": "exp/mtp.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine/profile_trace/iteration_22528/rank0_trace.json", + "displayTimeUnit": "ms", + "baseTimeNanoseconds": 1751410836000000000 +} \ No newline at end of file